diff --git a/.gitignore b/.gitignore index 16d03d4a193..0a0a9f2c3fe 100644 --- a/.gitignore +++ b/.gitignore @@ -88,6 +88,14 @@ GSYMS /tools/openfst-1.3.4/ /tools/openfst-1.4.1.tar.gz /tools/openfst-1.4.1/ +/tools/openfst-1.5.4.tar.gz +/tools/openfst-1.5.4/ +/tools/openfst-1.6.0.tar.gz +/tools/openfst-1.6.0/ +/tools/openfst-1.6.1.tar.gz +/tools/openfst-1.6.1/ +/tools/openfst-1.6.2.tar.gz +/tools/openfst-1.6.2/ /tools/pa_stable_v19_20111121.tgz /tools/portaudio/ /tools/sctk-2.4.0-20091110-0958.tar.bz2 @@ -114,6 +122,7 @@ GSYMS /tools/pthreads /tools/pthreads*.zip /tools/sequitur +/tools/sequitur-g2p /tools/srilm.tgz /tools/liblbfgs-1.10.tar.gz /tools/liblbfgs-1.10/ @@ -124,4 +133,3 @@ GSYMS /tools/sequitur-g2p/ /kaldiwin_vs* - diff --git a/.travis.yml b/.travis.yml index 85bbc7a52e4..f8e2bac0362 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,16 +10,19 @@ addons: apt: sources: - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.8 packages: - gdb - gcc-4.9 - g++-4.9 - gfortran-4.9 - liblapack-dev + - clang-3.8 branches: only: - master + - shortcut before_install: - cat /proc/sys/kernel/core_pattern @@ -27,7 +30,7 @@ before_install: - tools/extras/travis_install_bindeps.sh $XROOT script: - - CXX=g++-4.9 + - CXX=clang++-3.8 CFLAGS="-march=native" LDFLAGS="-llapack" INCDIRS="$XROOT/usr/include" diff --git a/README.md b/README.md index 32d4945a909..73abe9f1e3f 100644 --- a/README.md +++ b/README.md @@ -40,25 +40,30 @@ Development pattern for contributors ------------------------------------ 1. [Create a personal fork](https://help.github.com/articles/fork-a-repo/) - of the [main Kaldi repository] (https://github.com/kaldi-asr/kaldi) in GitHub. + of the [main Kaldi repository](https://github.com/kaldi-asr/kaldi) in GitHub. 2. Make your changes in a named branch different from `master`, e.g. you create a branch `my-awesome-feature`. 3. [Generate a pull request](https://help.github.com/articles/creating-a-pull-request/) through the Web interface of GitHub. -4. As a general rule, please follow [Google C++ Style Guide] - (https://google.github.io/styleguide/cppguide.html). +4. As a general rule, please follow [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). There are a [few exceptions in Kaldi](http://kaldi-asr.org/doc/style.html). - You can use the [Google's cpplint.py] - (https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py) + You can use the [Google's cpplint.py](https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py) to verify that your code is free of basic mistakes. Platform specific notes ----------------------- -PowerPC 64bits little-endian (ppc64le): +### PowerPC 64bits little-endian (ppc64le) + - Kaldi is expected to work out of the box in RHEL >= 7 and Ubuntu >= 16.04 with OpenBLAS, ATLAS, or CUDA. -- CUDA drivers for ppc64le can be found at [https://developer.nvidia.com/cuda-downloads] - (https://developer.nvidia.com/cuda-downloads). -- An [IBM Redbook] (https://www.redbooks.ibm.com/abstracts/redp5169.html) is +- CUDA drivers for ppc64le can be found at [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads). +- An [IBM Redbook](https://www.redbooks.ibm.com/abstracts/redp5169.html) is available as a guide to install and configure CUDA. + +### Android + +- Kaldi supports cross compiling for Android using Android NDK, clang++ and + OpenBLAS. +- See [this blog post](http://jcsilva.github.io/2017/03/18/compile-kaldi-android/) + for details. diff --git a/egs/ami/s5/local/ami_ihm_data_prep.sh b/egs/ami/s5/local/ami_ihm_data_prep.sh index 3a1d43d1ea1..b3ec1723713 100755 --- a/egs/ami/s5/local/ami_ihm_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_data_prep.sh @@ -69,7 +69,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh index c3b9914d7a0..b69732a61eb 100755 --- a/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep.sh @@ -68,7 +68,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5/local/ami_mdm_data_prep.sh b/egs/ami/s5/local/ami_mdm_data_prep.sh index bc7e4180b4a..2cc973cb2d5 100755 --- a/egs/ami/s5/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_data_prep.sh @@ -75,7 +75,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh index ab0fd185f70..8d9e24a9838 100755 --- a/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep.sh @@ -67,7 +67,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_data_prep.sh b/egs/ami/s5/local/ami_sdm_data_prep.sh index 8eda00f1d15..e662759a610 100755 --- a/egs/ami/s5/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_data_prep.sh @@ -74,7 +74,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh index 01173d2e3a6..3fa7c938479 100755 --- a/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh index 24176d69a34..a6c2d02b7af 100755 --- a/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh +++ b/egs/ami/s5/local/online/run_nnet2_ms_perturbed.sh @@ -8,7 +8,7 @@ # This example script demonstrates how speed perturbation of the data helps the nnet training in the SWB setup. . ./cmd.sh -set -e +set -e stage=1 train_stage=-10 use_gpu=true @@ -27,13 +27,13 @@ fix_nnet=false if $use_gpu; then if ! cuda-compiled; then - cat < $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh index 3ae42afb3d8..746c42c4c1a 100755 --- a/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_ihm_scoring_data_prep.sh @@ -74,7 +74,7 @@ sed -e 's?.*/??' -e 's?.wav??' $dir/wav.flist | \ awk '{print $2}' $dir/segments | sort -u | join - $dir/wav1.scp > $dir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # (1d) reco2file_and_channel cat $dir/wav.scp \ diff --git a/egs/ami/s5b/local/ami_mdm_data_prep.sh b/egs/ami/s5b/local/ami_mdm_data_prep.sh index 0ab11c5893b..d100347a356 100755 --- a/egs/ami/s5b/local/ami_mdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_data_prep.sh @@ -79,7 +79,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp #prep reco2file_and_channel cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh index 4fbfe12ccad..65f514f223c 100755 --- a/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_mdm_scoring_data_prep.sh @@ -72,7 +72,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_sdm_data_prep.sh b/egs/ami/s5b/local/ami_sdm_data_prep.sh index 267aef75535..327595070a6 100755 --- a/egs/ami/s5b/local/ami_sdm_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_data_prep.sh @@ -86,7 +86,7 @@ awk '{print $1}' $dir/wav2.scp | join -2 2 - $dir/segments | \ awk '{print $1}' $dir/segments | join - $dir/text > $dir/t; mv $dir/t $dir/text #replace path with an appropriate sox command that select a single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $dir/wav2.scp > $dir/wav.scp # this file reco2file_and_channel maps recording-id cat $dir/wav.scp | \ diff --git a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh index d0609e552cd..1378f8b8965 100755 --- a/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh +++ b/egs/ami/s5b/local/ami_sdm_scoring_data_prep.sh @@ -82,7 +82,7 @@ sed -e 's?.*/??' -e 's?.wav??' $tmpdir/wav.flist | \ awk '{print $2}' $tmpdir/segments | sort -u | join - $tmpdir/wav1.scp > $tmpdir/wav2.scp #replace path with an appropriate sox command that select single channel only -awk '{print $1" sox -c 1 -t wavpcm -s "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp +awk '{print $1" sox -c 1 -t wavpcm -e signed-integer "$2" -t wavpcm - |"}' $tmpdir/wav2.scp > $tmpdir/wav.scp #prep reco2file_and_channel cat $tmpdir/wav.scp | \ diff --git a/egs/ami/s5b/local/chain/run_tdnn.sh b/egs/ami/s5b/local/chain/run_tdnn.sh index 61f8f499182..e1adaa9346d 120000 --- a/egs/ami/s5b/local/chain/run_tdnn.sh +++ b/egs/ami/s5b/local/chain/run_tdnn.sh @@ -1 +1 @@ -tuning/run_tdnn_1b.sh \ No newline at end of file +tuning/run_tdnn_1d.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/run_tdnn_lstm.sh b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh index 43145248fbd..23906f31954 120000 --- a/egs/ami/s5b/local/chain/run_tdnn_lstm.sh +++ b/egs/ami/s5b/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1i.sh \ No newline at end of file +tuning/run_tdnn_lstm_1j.sh \ No newline at end of file diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..a9f228cb55d --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,269 @@ +#!/bin/bash + +# same as 1b but uses PCA instead of +# LDA features for the ivector extractor. + +# Results on 03/27/2017: +# local/chain/compare_wer_general.sh ihm tdnn1b_sp_bi tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev 22.0 21.9 +# WER on eval 22.2 22.3 +# Final train prob -0.0813472 -0.0807054 +# Final valid prob -0.132032 -0.133564 +# Final train prob (xent) -1.41543 -1.41951 +# Final valid prob (xent) -1.62316 -1.63021 + +set -e -o pipefail +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +ivector_transform_type=pca +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn7 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..008060df070 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,306 @@ +#!/bin/bash + +# 1j is same as 1i but with changes related to fast-lstmp layer +# changed num-chunk-per-minibatch to be variable +# added extra_left_context_initial=0 +# and extra_right_context_final=0 +# These changes are similar to those between swbd's run_tdnn_lstm_1{c,d}.sh +# recipes + +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned \ +#System tdnn_lstm1i_sp_bi_ihmali_ld5 tdnn_lstm1j_sp_bi_ihmali_ld5 +#WER on dev 37.6 37.3 +#WER on eval 40.9 40.4 +#Final train prob -0.114135 -0.118532 +#Final valid prob -0.245208 -0.245593 +#Final train prob (xent) -1.47648 -1.48337 +#Final valid prob (xent) -2.16365 -2.11097 + +# steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ +# exp/sdm1/chain_cleaned/tdnn_lstm1i_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.142->-0.131 xent:train/valid[57,86,final]=(-1.78,-1.48,-1.48/-2.22,-2.17,-2.16) logprob:train/valid[57,86,final]=(-0.157,-0.117,-0.114/-0.243,-0.249,-0.245) +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.139->-0.130 xent:train/valid[57,86,final]=(-1.82,-1.50,-1.48/-2.18,-2.12,-2.11) logprob:train/valid[57,86,final]=(-0.165,-0.121,-0.119/-0.240,-0.247,-0.246) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1j #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100755 index 00000000000..b8d947d8e92 --- /dev/null +++ b/egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,302 @@ +#!/bin/bash + +# 1k is same as 1j but with smaller delay on the first lstm layer +# there is a 37% increase in training time 11hrs vs 8hrs and the gains are modest + +# Results with flags : --mic sdm1 --use-ihm-ali true --train-set train_cleaned --gmm tri3_cleaned +#System tdnn_lstm1j_sp_bi_ihmali_ld5 tdnn_lstm1k_sp_bi_ihmali_ld5 +#WER on dev 37.3 36.9 +#WER on eval 40.4 40.0 +#Final train prob -0.118532 -0.119421 +#Final valid prob -0.245593 -0.24915 +#Final train prob (xent) -1.48337 -1.48024 +#Final valid prob (xent) -2.11097 -2.1196 + +#steps/info/chain_dir_info.pl exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/ exp/sdm1/chain_cleaned/tdnn_lstm1k_sp_bi_ihmali_ld5 +# exp/sdm1/chain_cleaned/tdnn_lstm1j_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.139->-0.130 xent:train/valid[57,86,final]=(-1.82,-1.50,-1.48/-2.18,-2.12,-2.11) logprob:train/valid[57,86,final]=(-0.165,-0.121,-0.119/-0.240,-0.247,-0.246) +# exp/sdm1/chain_cleaned/tdnn_lstm1k_sp_bi_ihmali_ld5/: num-iters=87 nj=2..12 num-params=43.4M dim=40+100->3770 combine=-0.140->-0.130 xent:train/valid[57,86,final]=(-1.81,-1.49,-1.48/-2.19,-2.13,-2.12) logprob:train/valid[57,86,final]=(-0.163,-0.121,-0.119/-0.242,-0.249,-0.249) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +mic=ihm +nj=30 +min_seg_len=1.55 +use_ihm_ali=false +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +ihm_gmm=tri3 # the gmm for the IHM system (if --use-ihm-ali true). +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +chunk_width=150 +chunk_left_context=40 +chunk_right_context=0 +label_delay=5 +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tlstm_affix=1k #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + + +# decode options +extra_left_context=50 +frames_per_chunk= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 13 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 14 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4200 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +xent_regularize=0.1 + +if [ $stage -le 15 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=lstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-1 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn8 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn9 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=lstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 16 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5b/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + +graph_dir=$dir/graph_${LM} +if [ $stage -le 17 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_${LM} $dir $graph_dir +fi + +if [ $stage -le 18 ]; then + rm $dir/.error 2>/dev/null || true + + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + + for decode_set in dev eval; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $nj --cmd "$decode_cmd" \ + --extra-left-context $extra_left_context \ + --frames-per-chunk "$frames_per_chunk" \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --online-ivector-dir exp/$mic/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/$mic/${decode_set}_hires $dir/decode_${decode_set} || exit 1; + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/ami/s5b/local/nnet3/run_ivector_common.sh b/egs/ami/s5b/local/nnet3/run_ivector_common.sh index bccbb42494c..860009c5ef5 100755 --- a/egs/ami/s5b/local/nnet3/run_ivector_common.sh +++ b/egs/ami/s5b/local/nnet3/run_ivector_common.sh @@ -17,8 +17,8 @@ train_set=train # you might set this to e.g. train_cleaned. gmm=tri3 # This specifies a GMM-dir from the features of the type you're training the system on; # it should contain alignments for 'train_set'. - num_threads_ubm=32 +ivector_transform_type=lda nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stuff in, so it # becomes exp/$mic/nnet3_cleaned or whatever. @@ -30,7 +30,7 @@ nnet3_affix=_cleaned # affix for exp/$mic/nnet3 directory to put iVector stu gmmdir=exp/${mic}/${gmm} -for f in data/${mic}/${train_set}/feats.scp ${gmmdir}/final.mdl; do +for f in data/${mic}/${train_set}/feats.scp ; do if [ ! -f $f ]; then echo "$0: expected file $f to exist" exit 1 @@ -110,20 +110,36 @@ if [ $stage -le 4 ]; then echo "$0: warning: number of feats $n1 != $n2, if these are very different it could be bad." fi - echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." - if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then - # we don't want to overwrite old stuff, ask the user to delete it. - echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " - echo " ... please delete and then rerun, or use a later --stage option." - exit 1; - fi - steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ - --splice-opts "--left-context=3 --right-context=3" \ - 3000 10000 $temp_data_root/${train_set}_hires data/lang \ - $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + case $ivector_transform_type in + lda) + if [ ! -f ${gmmdir}/final.mdl ]; then + echo "$0: expected file ${gmmdir}/final.mdl to exist" + exit 1; + fi + echo "$0: training a system on the hires data for its LDA+MLLT transform, in order to produce the diagonal GMM." + if [ -e exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl ]; then + # we don't want to overwrite old stuff, ask the user to delete it. + echo "$0: exp/$mic/nnet3${nnet3_affix}/tri5/final.mdl already exists: " + echo " ... please delete and then rerun, or use a later --stage option." + exit 1; + fi + steps/train_lda_mllt.sh --cmd "$train_cmd" --num-iters 7 --mllt-iters "2 4 6" \ + --splice-opts "--left-context=3 --right-context=3" \ + 3000 10000 $temp_data_root/${train_set}_hires data/lang \ + $gmmdir exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + pca) + echo "$0: computing a PCA transform from the hires data." + steps/online/nnet2/get_pca_transform.sh --cmd "$train_cmd" \ + --splice-opts "--left-context=3 --right-context=3" \ + --max-utts 10000 --subsample 2 \ + $temp_data_root/${train_set}_hires \ + exp/$mic/nnet3${nnet3_affix}/tri5 + ;; + *) echo "$0: invalid iVector transform type $ivector_transform_type" && exit 1; + esac fi - if [ $stage -le 5 ]; then echo "$0: computing a subset of data to train the diagonal UBM." diff --git a/egs/aspire/s5/local/multi_condition/decode.sh b/egs/aspire/s5/local/multi_condition/decode.sh index 566524095a6..b09c4780e71 100755 --- a/egs/aspire/s5/local/multi_condition/decode.sh +++ b/egs/aspire/s5/local/multi_condition/decode.sh @@ -47,7 +47,7 @@ if [ $# -ne 3 ]; then echo " --iter # Iteration of model to decode; default is final." echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # e.g. '--num-threads 4' if you supply --num-threads 4" exit 1; fi diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh index 3b778b23162..4e34c78255a 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms.sh @@ -28,7 +28,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi - parallel_opts="-l gpu=1" + parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 @@ -47,7 +47,7 @@ else # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 - parallel_opts="-pe smp $num_threads" + parallel_opts="--num-threads $num_threads" fi # do the common parts of the script. diff --git a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh index ad5fba0929f..dc285f28f8e 100755 --- a/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh +++ b/egs/aspire/s5/local/multi_condition/run_nnet2_ms_disc.sh @@ -8,7 +8,7 @@ # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# # Note: rather than using any features we have dumped on disk, this script # regenerates them from the wav data three times-- when we do lattice # generation, numerator alignment and discriminative training. This made the @@ -42,20 +42,20 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat <" + + + diff --git a/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf b/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf new file mode 100644 index 00000000000..570bcab68ec --- /dev/null +++ b/egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf @@ -0,0 +1,54 @@ +# include common settings for fullLP systems. +. conf/common.limitedLP || exit 1; + + +#speech corpora files location +train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training +train_data_list=./conf/lists/404-georgian//sub-train.list +train_nj=32 + + +#Radical reduced DEV corpora files location +dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev2h_data_list=./conf/lists/404-georgian//dev.2h.list +dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev2h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml +) # dev2h_kwlists +dev2h_nj=16 +dev2h_subset_ecf=true + + +#Official DEV corpora files location +dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev +dev10h_data_list=./conf/lists/404-georgian//dev.list +dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm +dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml +dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm +dev10h_kwlists=( + [dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml + [eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml +) # dev10h_kwlists +dev10h_nj=32 + + +#Unsupervised dataset for LimitedLP condition +unsup_data_list=( + ./conf/lists/404-georgian//untranscribed-training.list + ./conf/lists/404-georgian//sub-train.untranscribed.list +) # unsup_data_list +unsup_data_dir=( + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training + /export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training +) # unsup_data_dir +unsup_nj=32 + + +lexicon_file= +lexiconFlags="--romanized --oov " + + + diff --git a/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list b/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list new file mode 100644 index 00000000000..a823552044c --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/dev.2h.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_10184_20141107_212406_inLine +BABEL_OP3_404_10184_20141107_212406_outLine +BABEL_OP3_404_12851_20141013_024620_inLine +BABEL_OP3_404_12851_20141013_024620_outLine +BABEL_OP3_404_16184_20141020_233508_inLine +BABEL_OP3_404_16184_20141020_233508_outLine +BABEL_OP3_404_17165_20141117_063008_inLine +BABEL_OP3_404_17165_20141117_063008_outLine +BABEL_OP3_404_17472_20141201_023731_inLine +BABEL_OP3_404_17472_20141201_023731_outLine +BABEL_OP3_404_18380_20141118_001754_inLine +BABEL_OP3_404_18380_20141118_001754_outLine +BABEL_OP3_404_18939_20141009_063127_inLine +BABEL_OP3_404_18939_20141009_063127_outLine +BABEL_OP3_404_22446_20141013_062554_inLine +BABEL_OP3_404_22446_20141013_062554_outLine +BABEL_OP3_404_22466_20141018_193129_inLine +BABEL_OP3_404_22466_20141018_193129_outLine +BABEL_OP3_404_22494_20141127_221208_inLine +BABEL_OP3_404_22494_20141127_221208_outLine +BABEL_OP3_404_22494_20141127_222057_inLine +BABEL_OP3_404_22494_20141127_222057_outLine +BABEL_OP3_404_23239_20141127_054155_inLine +BABEL_OP3_404_23239_20141127_054155_outLine +BABEL_OP3_404_24253_20150513_212152_inLine +BABEL_OP3_404_24253_20150513_212152_outLine +BABEL_OP3_404_24779_20150620_032949_inLine +BABEL_OP3_404_24779_20150620_032949_outLine +BABEL_OP3_404_26074_20141120_050650_inLine +BABEL_OP3_404_26074_20141120_050650_outLine +BABEL_OP3_404_28419_20141028_024104_inLine +BABEL_OP3_404_28419_20141028_024104_outLine +BABEL_OP3_404_33476_20141114_205102_inLine +BABEL_OP3_404_33476_20141114_205102_outLine +BABEL_OP3_404_34564_20141211_015413_inLine +BABEL_OP3_404_34564_20141211_015413_outLine +BABEL_OP3_404_35467_20141020_054030_inLine +BABEL_OP3_404_35467_20141020_054030_outLine +BABEL_OP3_404_38431_20141130_190122_inLine +BABEL_OP3_404_38431_20141130_190122_outLine +BABEL_OP3_404_41592_20141117_033328_inLine +BABEL_OP3_404_41592_20141117_033328_outLine +BABEL_OP3_404_41741_20141019_015552_inLine +BABEL_OP3_404_41741_20141019_015552_outLine +BABEL_OP3_404_42231_20141130_013425_inLine +BABEL_OP3_404_42231_20141130_013425_outLine +BABEL_OP3_404_42231_20141130_014628_inLine +BABEL_OP3_404_42231_20141130_014628_outLine +BABEL_OP3_404_42600_20141029_174857_inLine +BABEL_OP3_404_42600_20141029_174857_outLine +BABEL_OP3_404_44619_20141028_234639_inLine +BABEL_OP3_404_44619_20141028_234639_outLine +BABEL_OP3_404_46535_20150216_024618_inLine +BABEL_OP3_404_46535_20150216_024618_outLine +BABEL_OP3_404_46757_20141123_021510_inLine +BABEL_OP3_404_46757_20141123_021510_outLine +BABEL_OP3_404_47487_20141030_235808_inLine +BABEL_OP3_404_47487_20141030_235808_outLine +BABEL_OP3_404_47866_20150526_162411_inLine +BABEL_OP3_404_47866_20150526_162411_outLine +BABEL_OP3_404_47959_20141026_214447_inLine +BABEL_OP3_404_47959_20141026_214447_outLine +BABEL_OP3_404_51955_20141024_012212_inLine +BABEL_OP3_404_51955_20141024_012212_outLine +BABEL_OP3_404_51968_20141117_023015_inLine +BABEL_OP3_404_51968_20141117_023015_outLine +BABEL_OP3_404_52804_20141023_174815_inLine +BABEL_OP3_404_52804_20141023_174815_outLine +BABEL_OP3_404_54567_20141119_040337_inLine +BABEL_OP3_404_54567_20141119_040337_outLine +BABEL_OP3_404_56677_20141201_065523_inLine +BABEL_OP3_404_56677_20141201_065523_outLine +BABEL_OP3_404_56826_20141201_042429_inLine +BABEL_OP3_404_56826_20141201_042429_outLine +BABEL_OP3_404_58047_20141110_215330_inLine +BABEL_OP3_404_58047_20141110_215330_outLine +BABEL_OP3_404_58313_20141119_234202_inLine +BABEL_OP3_404_58313_20141119_234202_outLine +BABEL_OP3_404_59549_20141102_190355_inLine +BABEL_OP3_404_59549_20141102_190355_outLine +BABEL_OP3_404_60307_20150625_022621_inLine +BABEL_OP3_404_60307_20150625_022621_outLine +BABEL_OP3_404_61040_20141211_011552_inLine +BABEL_OP3_404_61040_20141211_011552_outLine +BABEL_OP3_404_61190_20141029_013447_inLine +BABEL_OP3_404_61190_20141029_013447_outLine +BABEL_OP3_404_64638_20141130_205157_inLine +BABEL_OP3_404_64638_20141130_205157_outLine +BABEL_OP3_404_66472_20141107_204602_inLine +BABEL_OP3_404_66472_20141107_204602_outLine +BABEL_OP3_404_66519_20141031_015751_inLine +BABEL_OP3_404_66519_20141031_015751_outLine +BABEL_OP3_404_67794_20141103_023323_inLine +BABEL_OP3_404_67794_20141103_023323_outLine +BABEL_OP3_404_73696_20150618_060036_inLine +BABEL_OP3_404_73696_20150618_060036_outLine +BABEL_OP3_404_73757_20141117_025704_inLine +BABEL_OP3_404_73757_20141117_025704_outLine +BABEL_OP3_404_74121_20141120_020705_inLine +BABEL_OP3_404_74121_20141120_020705_outLine +BABEL_OP3_404_80781_20141104_212234_inLine +BABEL_OP3_404_80781_20141104_212234_outLine +BABEL_OP3_404_80881_20141010_222135_inLine +BABEL_OP3_404_80881_20141010_222135_outLine +BABEL_OP3_404_81424_20141123_000421_inLine +BABEL_OP3_404_81424_20141123_000421_outLine +BABEL_OP3_404_87298_20141025_213601_inLine +BABEL_OP3_404_87298_20141025_213601_outLine +BABEL_OP3_404_87313_20141119_014632_inLine +BABEL_OP3_404_87313_20141119_014632_outLine +BABEL_OP3_404_87796_20141120_065537_inLine +BABEL_OP3_404_87796_20141120_065537_outLine +BABEL_OP3_404_87884_20141128_211555_inLine +BABEL_OP3_404_87884_20141128_211555_outLine +BABEL_OP3_404_88776_20141006_193621_inLine +BABEL_OP3_404_88776_20141006_193621_outLine +BABEL_OP3_404_91760_20150609_033824_inLine +BABEL_OP3_404_91760_20150609_033824_outLine +BABEL_OP3_404_91930_20150522_034521_inLine +BABEL_OP3_404_91930_20150522_034521_outLine +BABEL_OP3_404_92740_20141126_025242_inLine +BABEL_OP3_404_92740_20141126_025242_outLine +BABEL_OP3_404_97376_20141126_024552_inLine +BABEL_OP3_404_97376_20141126_024552_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/dev.list b/egs/babel/s5d/conf/lists/404-georgian/dev.list new file mode 100644 index 00000000000..a823552044c --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/dev.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_10184_20141107_212406_inLine +BABEL_OP3_404_10184_20141107_212406_outLine +BABEL_OP3_404_12851_20141013_024620_inLine +BABEL_OP3_404_12851_20141013_024620_outLine +BABEL_OP3_404_16184_20141020_233508_inLine +BABEL_OP3_404_16184_20141020_233508_outLine +BABEL_OP3_404_17165_20141117_063008_inLine +BABEL_OP3_404_17165_20141117_063008_outLine +BABEL_OP3_404_17472_20141201_023731_inLine +BABEL_OP3_404_17472_20141201_023731_outLine +BABEL_OP3_404_18380_20141118_001754_inLine +BABEL_OP3_404_18380_20141118_001754_outLine +BABEL_OP3_404_18939_20141009_063127_inLine +BABEL_OP3_404_18939_20141009_063127_outLine +BABEL_OP3_404_22446_20141013_062554_inLine +BABEL_OP3_404_22446_20141013_062554_outLine +BABEL_OP3_404_22466_20141018_193129_inLine +BABEL_OP3_404_22466_20141018_193129_outLine +BABEL_OP3_404_22494_20141127_221208_inLine +BABEL_OP3_404_22494_20141127_221208_outLine +BABEL_OP3_404_22494_20141127_222057_inLine +BABEL_OP3_404_22494_20141127_222057_outLine +BABEL_OP3_404_23239_20141127_054155_inLine +BABEL_OP3_404_23239_20141127_054155_outLine +BABEL_OP3_404_24253_20150513_212152_inLine +BABEL_OP3_404_24253_20150513_212152_outLine +BABEL_OP3_404_24779_20150620_032949_inLine +BABEL_OP3_404_24779_20150620_032949_outLine +BABEL_OP3_404_26074_20141120_050650_inLine +BABEL_OP3_404_26074_20141120_050650_outLine +BABEL_OP3_404_28419_20141028_024104_inLine +BABEL_OP3_404_28419_20141028_024104_outLine +BABEL_OP3_404_33476_20141114_205102_inLine +BABEL_OP3_404_33476_20141114_205102_outLine +BABEL_OP3_404_34564_20141211_015413_inLine +BABEL_OP3_404_34564_20141211_015413_outLine +BABEL_OP3_404_35467_20141020_054030_inLine +BABEL_OP3_404_35467_20141020_054030_outLine +BABEL_OP3_404_38431_20141130_190122_inLine +BABEL_OP3_404_38431_20141130_190122_outLine +BABEL_OP3_404_41592_20141117_033328_inLine +BABEL_OP3_404_41592_20141117_033328_outLine +BABEL_OP3_404_41741_20141019_015552_inLine +BABEL_OP3_404_41741_20141019_015552_outLine +BABEL_OP3_404_42231_20141130_013425_inLine +BABEL_OP3_404_42231_20141130_013425_outLine +BABEL_OP3_404_42231_20141130_014628_inLine +BABEL_OP3_404_42231_20141130_014628_outLine +BABEL_OP3_404_42600_20141029_174857_inLine +BABEL_OP3_404_42600_20141029_174857_outLine +BABEL_OP3_404_44619_20141028_234639_inLine +BABEL_OP3_404_44619_20141028_234639_outLine +BABEL_OP3_404_46535_20150216_024618_inLine +BABEL_OP3_404_46535_20150216_024618_outLine +BABEL_OP3_404_46757_20141123_021510_inLine +BABEL_OP3_404_46757_20141123_021510_outLine +BABEL_OP3_404_47487_20141030_235808_inLine +BABEL_OP3_404_47487_20141030_235808_outLine +BABEL_OP3_404_47866_20150526_162411_inLine +BABEL_OP3_404_47866_20150526_162411_outLine +BABEL_OP3_404_47959_20141026_214447_inLine +BABEL_OP3_404_47959_20141026_214447_outLine +BABEL_OP3_404_51955_20141024_012212_inLine +BABEL_OP3_404_51955_20141024_012212_outLine +BABEL_OP3_404_51968_20141117_023015_inLine +BABEL_OP3_404_51968_20141117_023015_outLine +BABEL_OP3_404_52804_20141023_174815_inLine +BABEL_OP3_404_52804_20141023_174815_outLine +BABEL_OP3_404_54567_20141119_040337_inLine +BABEL_OP3_404_54567_20141119_040337_outLine +BABEL_OP3_404_56677_20141201_065523_inLine +BABEL_OP3_404_56677_20141201_065523_outLine +BABEL_OP3_404_56826_20141201_042429_inLine +BABEL_OP3_404_56826_20141201_042429_outLine +BABEL_OP3_404_58047_20141110_215330_inLine +BABEL_OP3_404_58047_20141110_215330_outLine +BABEL_OP3_404_58313_20141119_234202_inLine +BABEL_OP3_404_58313_20141119_234202_outLine +BABEL_OP3_404_59549_20141102_190355_inLine +BABEL_OP3_404_59549_20141102_190355_outLine +BABEL_OP3_404_60307_20150625_022621_inLine +BABEL_OP3_404_60307_20150625_022621_outLine +BABEL_OP3_404_61040_20141211_011552_inLine +BABEL_OP3_404_61040_20141211_011552_outLine +BABEL_OP3_404_61190_20141029_013447_inLine +BABEL_OP3_404_61190_20141029_013447_outLine +BABEL_OP3_404_64638_20141130_205157_inLine +BABEL_OP3_404_64638_20141130_205157_outLine +BABEL_OP3_404_66472_20141107_204602_inLine +BABEL_OP3_404_66472_20141107_204602_outLine +BABEL_OP3_404_66519_20141031_015751_inLine +BABEL_OP3_404_66519_20141031_015751_outLine +BABEL_OP3_404_67794_20141103_023323_inLine +BABEL_OP3_404_67794_20141103_023323_outLine +BABEL_OP3_404_73696_20150618_060036_inLine +BABEL_OP3_404_73696_20150618_060036_outLine +BABEL_OP3_404_73757_20141117_025704_inLine +BABEL_OP3_404_73757_20141117_025704_outLine +BABEL_OP3_404_74121_20141120_020705_inLine +BABEL_OP3_404_74121_20141120_020705_outLine +BABEL_OP3_404_80781_20141104_212234_inLine +BABEL_OP3_404_80781_20141104_212234_outLine +BABEL_OP3_404_80881_20141010_222135_inLine +BABEL_OP3_404_80881_20141010_222135_outLine +BABEL_OP3_404_81424_20141123_000421_inLine +BABEL_OP3_404_81424_20141123_000421_outLine +BABEL_OP3_404_87298_20141025_213601_inLine +BABEL_OP3_404_87298_20141025_213601_outLine +BABEL_OP3_404_87313_20141119_014632_inLine +BABEL_OP3_404_87313_20141119_014632_outLine +BABEL_OP3_404_87796_20141120_065537_inLine +BABEL_OP3_404_87796_20141120_065537_outLine +BABEL_OP3_404_87884_20141128_211555_inLine +BABEL_OP3_404_87884_20141128_211555_outLine +BABEL_OP3_404_88776_20141006_193621_inLine +BABEL_OP3_404_88776_20141006_193621_outLine +BABEL_OP3_404_91760_20150609_033824_inLine +BABEL_OP3_404_91760_20150609_033824_outLine +BABEL_OP3_404_91930_20150522_034521_inLine +BABEL_OP3_404_91930_20150522_034521_outLine +BABEL_OP3_404_92740_20141126_025242_inLine +BABEL_OP3_404_92740_20141126_025242_outLine +BABEL_OP3_404_97376_20141126_024552_inLine +BABEL_OP3_404_97376_20141126_024552_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/eval.list b/egs/babel/s5d/conf/lists/404-georgian/eval.list new file mode 100644 index 00000000000..d197b90ee2f --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/eval.list @@ -0,0 +1,956 @@ +BABEL_OP3_404_10036_20141030_200515_inLine +BABEL_OP3_404_10036_20141030_200515_outLine +BABEL_OP3_404_10188_20141021_043537_inLine +BABEL_OP3_404_10188_20141021_043537_outLine +BABEL_OP3_404_10319_20141015_010220_inLine +BABEL_OP3_404_10319_20141015_010220_outLine +BABEL_OP3_404_10319_20141015_011118_inLine +BABEL_OP3_404_10319_20141015_011118_outLine +BABEL_OP3_404_10482_20141130_013900_inLine +BABEL_OP3_404_10482_20141130_013900_outLine +BABEL_OP3_404_10524_20150518_002415_inLine +BABEL_OP3_404_10524_20150518_002415_outLine +BABEL_OP3_404_10901_20141120_172058_inLine +BABEL_OP3_404_10901_20141120_172058_outLine +BABEL_OP3_404_10966_20141027_000701_inLine +BABEL_OP3_404_10966_20141027_000701_outLine +BABEL_OP3_404_11419_20150212_050835_inLine +BABEL_OP3_404_11419_20150212_050835_outLine +BABEL_OP3_404_11419_20150212_051550_inLine +BABEL_OP3_404_11419_20150212_051550_outLine +BABEL_OP3_404_11581_20141110_223927_inLine +BABEL_OP3_404_11581_20141110_223927_outLine +BABEL_OP3_404_11797_20141019_195244_inLine +BABEL_OP3_404_11797_20141019_195244_outLine +BABEL_OP3_404_12321_20141211_055837_inLine +BABEL_OP3_404_12321_20141211_055837_outLine +BABEL_OP3_404_13040_20141024_004921_inLine +BABEL_OP3_404_13040_20141024_004921_outLine +BABEL_OP3_404_13427_20141107_220103_inLine +BABEL_OP3_404_13427_20141107_220103_outLine +BABEL_OP3_404_13483_20141128_002800_inLine +BABEL_OP3_404_13483_20141128_002800_outLine +BABEL_OP3_404_13490_20141118_023408_inLine +BABEL_OP3_404_13490_20141118_023408_outLine +BABEL_OP3_404_13561_20141115_003843_inLine +BABEL_OP3_404_13561_20141115_003843_outLine +BABEL_OP3_404_13586_20141106_180057_inLine +BABEL_OP3_404_13586_20141106_180057_outLine +BABEL_OP3_404_13744_20141021_043037_inLine +BABEL_OP3_404_13744_20141021_043037_outLine +BABEL_OP3_404_13792_20141011_010111_inLine +BABEL_OP3_404_13792_20141011_010111_outLine +BABEL_OP3_404_14097_20150211_010746_inLine +BABEL_OP3_404_14097_20150211_010746_outLine +BABEL_OP3_404_14179_20141201_063636_inLine +BABEL_OP3_404_14179_20141201_063636_outLine +BABEL_OP3_404_14228_20141130_062059_inLine +BABEL_OP3_404_14228_20141130_062059_outLine +BABEL_OP3_404_14560_20141201_073709_inLine +BABEL_OP3_404_14560_20141201_073709_outLine +BABEL_OP3_404_14719_20141201_014614_inLine +BABEL_OP3_404_14719_20141201_014614_outLine +BABEL_OP3_404_14725_20141013_005356_inLine +BABEL_OP3_404_14725_20141013_005356_outLine +BABEL_OP3_404_15163_20141115_035641_inLine +BABEL_OP3_404_15163_20141115_035641_outLine +BABEL_OP3_404_15322_20150512_231817_inLine +BABEL_OP3_404_15322_20150512_231817_outLine +BABEL_OP3_404_15324_20141120_031528_inLine +BABEL_OP3_404_15324_20141120_031528_outLine +BABEL_OP3_404_15702_20141129_051812_inLine +BABEL_OP3_404_15702_20141129_051812_outLine +BABEL_OP3_404_15730_20141021_055606_inLine +BABEL_OP3_404_15730_20141021_055606_outLine +BABEL_OP3_404_15926_20141124_004339_inLine +BABEL_OP3_404_15926_20141124_004339_outLine +BABEL_OP3_404_15926_20141124_005513_inLine +BABEL_OP3_404_15926_20141124_005513_outLine +BABEL_OP3_404_16056_20141009_005123_inLine +BABEL_OP3_404_16056_20141009_005123_outLine +BABEL_OP3_404_16787_20141120_174312_inLine +BABEL_OP3_404_16787_20141120_174312_outLine +BABEL_OP3_404_16800_20141212_184132_inLine +BABEL_OP3_404_16800_20141212_184132_outLine +BABEL_OP3_404_16800_20141212_185849_inLine +BABEL_OP3_404_16800_20141212_185849_outLine +BABEL_OP3_404_16886_20141117_002313_inLine +BABEL_OP3_404_16886_20141117_002313_outLine +BABEL_OP3_404_16886_20141117_003801_inLine +BABEL_OP3_404_16886_20141117_003801_outLine +BABEL_OP3_404_16924_20141201_020122_inLine +BABEL_OP3_404_16924_20141201_020122_outLine +BABEL_OP3_404_16938_20141118_045730_inLine +BABEL_OP3_404_16938_20141118_045730_outLine +BABEL_OP3_404_17032_20141128_030249_inLine +BABEL_OP3_404_17032_20141128_030249_outLine +BABEL_OP3_404_17440_20141127_041844_inLine +BABEL_OP3_404_17440_20141127_041844_outLine +BABEL_OP3_404_17496_20141130_022805_inLine +BABEL_OP3_404_17496_20141130_022805_outLine +BABEL_OP3_404_17751_20150611_030539_inLine +BABEL_OP3_404_17751_20150611_030539_outLine +BABEL_OP3_404_17881_20150524_231317_inLine +BABEL_OP3_404_17881_20150524_231317_outLine +BABEL_OP3_404_17914_20150526_054931_inLine +BABEL_OP3_404_17914_20150526_054931_outLine +BABEL_OP3_404_18280_20150213_011322_inLine +BABEL_OP3_404_18280_20150213_011322_outLine +BABEL_OP3_404_18370_20150210_194727_inLine +BABEL_OP3_404_18370_20150210_194727_outLine +BABEL_OP3_404_18924_20141110_211055_inLine +BABEL_OP3_404_18924_20141110_211055_outLine +BABEL_OP3_404_19101_20141113_042102_inLine +BABEL_OP3_404_19101_20141113_042102_outLine +BABEL_OP3_404_19545_20141107_223152_inLine +BABEL_OP3_404_19545_20141107_223152_outLine +BABEL_OP3_404_19621_20141201_041129_inLine +BABEL_OP3_404_19621_20141201_041129_outLine +BABEL_OP3_404_19672_20141124_015046_inLine +BABEL_OP3_404_19672_20141124_015046_outLine +BABEL_OP3_404_19722_20141006_033717_inLine +BABEL_OP3_404_19722_20141006_033717_outLine +BABEL_OP3_404_19782_20141201_231608_inLine +BABEL_OP3_404_19782_20141201_231608_outLine +BABEL_OP3_404_19818_20141124_044516_inLine +BABEL_OP3_404_19818_20141124_044516_outLine +BABEL_OP3_404_20367_20150618_055644_inLine +BABEL_OP3_404_20367_20150618_055644_outLine +BABEL_OP3_404_20682_20141211_044056_inLine +BABEL_OP3_404_20682_20141211_044056_outLine +BABEL_OP3_404_20682_20141211_045257_inLine +BABEL_OP3_404_20682_20141211_045257_outLine +BABEL_OP3_404_20738_20150503_191409_inLine +BABEL_OP3_404_20738_20150503_191409_outLine +BABEL_OP3_404_20768_20141207_081305_inLine +BABEL_OP3_404_20768_20141207_081305_outLine +BABEL_OP3_404_20800_20141022_192312_inLine +BABEL_OP3_404_20800_20141022_192312_outLine +BABEL_OP3_404_20916_20141006_192451_inLine +BABEL_OP3_404_20916_20141006_192451_outLine +BABEL_OP3_404_21029_20141105_033902_inLine +BABEL_OP3_404_21029_20141105_033902_outLine +BABEL_OP3_404_21206_20141024_194128_inLine +BABEL_OP3_404_21206_20141024_194128_outLine +BABEL_OP3_404_21624_20150525_034841_inLine +BABEL_OP3_404_21624_20150525_034841_outLine +BABEL_OP3_404_21794_20141115_220258_inLine +BABEL_OP3_404_21794_20141115_220258_outLine +BABEL_OP3_404_22021_20150217_213437_inLine +BABEL_OP3_404_22021_20150217_213437_outLine +BABEL_OP3_404_22021_20150220_194248_inLine +BABEL_OP3_404_22021_20150220_194248_outLine +BABEL_OP3_404_22034_20150211_165126_inLine +BABEL_OP3_404_22034_20150211_165126_outLine +BABEL_OP3_404_22170_20150528_002541_inLine +BABEL_OP3_404_22170_20150528_002541_outLine +BABEL_OP3_404_22216_20141020_051333_inLine +BABEL_OP3_404_22216_20141020_051333_outLine +BABEL_OP3_404_22321_20141019_214812_inLine +BABEL_OP3_404_22321_20141019_214812_outLine +BABEL_OP3_404_22612_20141201_080517_inLine +BABEL_OP3_404_22612_20141201_080517_outLine +BABEL_OP3_404_22641_20141021_165119_inLine +BABEL_OP3_404_22641_20141021_165119_outLine +BABEL_OP3_404_22965_20141101_192617_inLine +BABEL_OP3_404_22965_20141101_192617_outLine +BABEL_OP3_404_23006_20141026_211155_inLine +BABEL_OP3_404_23006_20141026_211155_outLine +BABEL_OP3_404_23092_20141129_005335_inLine +BABEL_OP3_404_23092_20141129_005335_outLine +BABEL_OP3_404_23153_20141118_015224_inLine +BABEL_OP3_404_23153_20141118_015224_outLine +BABEL_OP3_404_23628_20141027_170345_inLine +BABEL_OP3_404_23628_20141027_170345_outLine +BABEL_OP3_404_24017_20141211_021947_inLine +BABEL_OP3_404_24017_20141211_021947_outLine +BABEL_OP3_404_24290_20150515_164252_inLine +BABEL_OP3_404_24290_20150515_164252_outLine +BABEL_OP3_404_24569_20141130_214924_inLine +BABEL_OP3_404_24569_20141130_214924_outLine +BABEL_OP3_404_24605_20141013_043620_inLine +BABEL_OP3_404_24605_20141013_043620_outLine +BABEL_OP3_404_25698_20150611_021501_inLine +BABEL_OP3_404_25698_20150611_021501_outLine +BABEL_OP3_404_25767_20141009_211814_inLine +BABEL_OP3_404_25767_20141009_211814_outLine +BABEL_OP3_404_26206_20141128_031139_inLine +BABEL_OP3_404_26206_20141128_031139_outLine +BABEL_OP3_404_26999_20141130_004320_inLine +BABEL_OP3_404_26999_20141130_004320_outLine +BABEL_OP3_404_27082_20141119_041436_inLine +BABEL_OP3_404_27082_20141119_041436_outLine +BABEL_OP3_404_27125_20141007_032335_inLine +BABEL_OP3_404_27125_20141007_032335_outLine +BABEL_OP3_404_27478_20150514_205232_inLine +BABEL_OP3_404_27478_20150514_205232_outLine +BABEL_OP3_404_28422_20141124_055809_inLine +BABEL_OP3_404_28422_20141124_055809_outLine +BABEL_OP3_404_28606_20141127_011719_inLine +BABEL_OP3_404_28606_20141127_011719_outLine +BABEL_OP3_404_28775_20141028_193907_inLine +BABEL_OP3_404_28775_20141028_193907_outLine +BABEL_OP3_404_29023_20141024_225827_inLine +BABEL_OP3_404_29023_20141024_225827_outLine +BABEL_OP3_404_29072_20141128_023212_inLine +BABEL_OP3_404_29072_20141128_023212_outLine +BABEL_OP3_404_29135_20141022_182050_inLine +BABEL_OP3_404_29135_20141022_182050_outLine +BABEL_OP3_404_29168_20141023_013832_inLine +BABEL_OP3_404_29168_20141023_013832_outLine +BABEL_OP3_404_29352_20150618_035033_inLine +BABEL_OP3_404_29352_20150618_035033_outLine +BABEL_OP3_404_29352_20150618_041025_inLine +BABEL_OP3_404_29352_20150618_041025_outLine +BABEL_OP3_404_29685_20141103_223309_inLine +BABEL_OP3_404_29685_20141103_223309_outLine +BABEL_OP3_404_29765_20150616_155830_inLine +BABEL_OP3_404_29765_20150616_155830_outLine +BABEL_OP3_404_30013_20141127_211853_inLine +BABEL_OP3_404_30013_20141127_211853_outLine +BABEL_OP3_404_30058_20150514_024957_inLine +BABEL_OP3_404_30058_20150514_024957_outLine +BABEL_OP3_404_30180_20141118_011806_inLine +BABEL_OP3_404_30180_20141118_011806_outLine +BABEL_OP3_404_30253_20141201_051926_inLine +BABEL_OP3_404_30253_20141201_051926_outLine +BABEL_OP3_404_30395_20141106_185545_inLine +BABEL_OP3_404_30395_20141106_185545_outLine +BABEL_OP3_404_31039_20150217_050120_inLine +BABEL_OP3_404_31039_20150217_050120_outLine +BABEL_OP3_404_31039_20150217_051317_inLine +BABEL_OP3_404_31039_20150217_051317_outLine +BABEL_OP3_404_31074_20150121_022649_inLine +BABEL_OP3_404_31074_20150121_022649_outLine +BABEL_OP3_404_31184_20141118_183536_inLine +BABEL_OP3_404_31184_20141118_183536_outLine +BABEL_OP3_404_31490_20141022_200135_inLine +BABEL_OP3_404_31490_20141022_200135_outLine +BABEL_OP3_404_31583_20141130_004731_inLine +BABEL_OP3_404_31583_20141130_004731_outLine +BABEL_OP3_404_31628_20141202_000346_inLine +BABEL_OP3_404_31628_20141202_000346_outLine +BABEL_OP3_404_32097_20141006_221638_inLine +BABEL_OP3_404_32097_20141006_221638_outLine +BABEL_OP3_404_32244_20150609_043200_inLine +BABEL_OP3_404_32244_20150609_043200_outLine +BABEL_OP3_404_32301_20141126_204138_inLine +BABEL_OP3_404_32301_20141126_204138_outLine +BABEL_OP3_404_33111_20150528_004829_inLine +BABEL_OP3_404_33111_20150528_004829_outLine +BABEL_OP3_404_33251_20141119_205146_inLine +BABEL_OP3_404_33251_20141119_205146_outLine +BABEL_OP3_404_33273_20141105_213401_inLine +BABEL_OP3_404_33273_20141105_213401_outLine +BABEL_OP3_404_33497_20141119_051436_inLine +BABEL_OP3_404_33497_20141119_051436_outLine +BABEL_OP3_404_33635_20141106_005750_inLine +BABEL_OP3_404_33635_20141106_005750_outLine +BABEL_OP3_404_33672_20141014_004055_inLine +BABEL_OP3_404_33672_20141014_004055_outLine +BABEL_OP3_404_33672_20141014_005233_inLine +BABEL_OP3_404_33672_20141014_005233_outLine +BABEL_OP3_404_33951_20141119_072531_inLine +BABEL_OP3_404_33951_20141119_072531_outLine +BABEL_OP3_404_34197_20141018_201528_inLine +BABEL_OP3_404_34197_20141018_201528_outLine +BABEL_OP3_404_34336_20141027_211535_inLine +BABEL_OP3_404_34336_20141027_211535_outLine +BABEL_OP3_404_34477_20141027_184645_inLine +BABEL_OP3_404_34477_20141027_184645_outLine +BABEL_OP3_404_34903_20141124_020719_inLine +BABEL_OP3_404_34903_20141124_020719_outLine +BABEL_OP3_404_35139_20141023_224322_inLine +BABEL_OP3_404_35139_20141023_224322_outLine +BABEL_OP3_404_35202_20141128_053756_inLine +BABEL_OP3_404_35202_20141128_053756_outLine +BABEL_OP3_404_35885_20150518_015426_inLine +BABEL_OP3_404_35885_20150518_015426_outLine +BABEL_OP3_404_36293_20141006_004659_inLine +BABEL_OP3_404_36293_20141006_004659_outLine +BABEL_OP3_404_36341_20141021_045218_inLine +BABEL_OP3_404_36341_20141021_045218_outLine +BABEL_OP3_404_36669_20141116_050542_inLine +BABEL_OP3_404_36669_20141116_050542_outLine +BABEL_OP3_404_36894_20141009_013557_inLine +BABEL_OP3_404_36894_20141009_013557_outLine +BABEL_OP3_404_36990_20141117_041052_inLine +BABEL_OP3_404_36990_20141117_041052_outLine +BABEL_OP3_404_37068_20150212_050250_inLine +BABEL_OP3_404_37068_20150212_050250_outLine +BABEL_OP3_404_37285_20141128_060822_inLine +BABEL_OP3_404_37285_20141128_060822_outLine +BABEL_OP3_404_37684_20150211_031551_inLine +BABEL_OP3_404_37684_20150211_031551_outLine +BABEL_OP3_404_38076_20141129_030136_inLine +BABEL_OP3_404_38076_20141129_030136_outLine +BABEL_OP3_404_38689_20141128_235841_inLine +BABEL_OP3_404_38689_20141128_235841_outLine +BABEL_OP3_404_38741_20141028_190310_inLine +BABEL_OP3_404_38741_20141028_190310_outLine +BABEL_OP3_404_38750_20141130_052516_inLine +BABEL_OP3_404_38750_20141130_052516_outLine +BABEL_OP3_404_38878_20141118_224023_inLine +BABEL_OP3_404_38878_20141118_224023_outLine +BABEL_OP3_404_39006_20150617_032943_inLine +BABEL_OP3_404_39006_20150617_032943_outLine +BABEL_OP3_404_39159_20141021_033733_inLine +BABEL_OP3_404_39159_20141021_033733_outLine +BABEL_OP3_404_39848_20141113_234103_inLine +BABEL_OP3_404_39848_20141113_234103_outLine +BABEL_OP3_404_40565_20141126_191549_inLine +BABEL_OP3_404_40565_20141126_191549_outLine +BABEL_OP3_404_41038_20141201_070557_inLine +BABEL_OP3_404_41038_20141201_070557_outLine +BABEL_OP3_404_41174_20141117_033354_inLine +BABEL_OP3_404_41174_20141117_033354_outLine +BABEL_OP3_404_41442_20141201_065524_inLine +BABEL_OP3_404_41442_20141201_065524_outLine +BABEL_OP3_404_41469_20141015_041032_inLine +BABEL_OP3_404_41469_20141015_041032_outLine +BABEL_OP3_404_41493_20141007_192601_inLine +BABEL_OP3_404_41493_20141007_192601_outLine +BABEL_OP3_404_41618_20141114_232533_inLine +BABEL_OP3_404_41618_20141114_232533_outLine +BABEL_OP3_404_41890_20150516_214915_inLine +BABEL_OP3_404_41890_20150516_214915_outLine +BABEL_OP3_404_42146_20150524_225524_inLine +BABEL_OP3_404_42146_20150524_225524_outLine +BABEL_OP3_404_42434_20141101_015900_inLine +BABEL_OP3_404_42434_20141101_015900_outLine +BABEL_OP3_404_42718_20150514_042601_inLine +BABEL_OP3_404_42718_20150514_042601_outLine +BABEL_OP3_404_42771_20141119_032738_inLine +BABEL_OP3_404_42771_20141119_032738_outLine +BABEL_OP3_404_42942_20141105_231330_inLine +BABEL_OP3_404_42942_20141105_231330_outLine +BABEL_OP3_404_42991_20141201_174138_inLine +BABEL_OP3_404_42991_20141201_174138_outLine +BABEL_OP3_404_43115_20150518_051249_inLine +BABEL_OP3_404_43115_20150518_051249_outLine +BABEL_OP3_404_43285_20141127_224948_inLine +BABEL_OP3_404_43285_20141127_224948_outLine +BABEL_OP3_404_43286_20141011_233252_inLine +BABEL_OP3_404_43286_20141011_233252_outLine +BABEL_OP3_404_43646_20141011_031534_inLine +BABEL_OP3_404_43646_20141011_031534_outLine +BABEL_OP3_404_43784_20141101_215816_inLine +BABEL_OP3_404_43784_20141101_215816_outLine +BABEL_OP3_404_43784_20141101_220445_inLine +BABEL_OP3_404_43784_20141101_220445_outLine +BABEL_OP3_404_43784_20141101_222312_inLine +BABEL_OP3_404_43784_20141101_222312_outLine +BABEL_OP3_404_43788_20141125_190621_inLine +BABEL_OP3_404_43788_20141125_190621_outLine +BABEL_OP3_404_43920_20141128_232903_inLine +BABEL_OP3_404_43920_20141128_232903_outLine +BABEL_OP3_404_44255_20150525_073716_inLine +BABEL_OP3_404_44255_20150525_073716_outLine +BABEL_OP3_404_44420_20141025_211032_inLine +BABEL_OP3_404_44420_20141025_211032_outLine +BABEL_OP3_404_44531_20150527_015805_inLine +BABEL_OP3_404_44531_20150527_015805_outLine +BABEL_OP3_404_44709_20141126_024811_inLine +BABEL_OP3_404_44709_20141126_024811_outLine +BABEL_OP3_404_44868_20141123_032254_inLine +BABEL_OP3_404_44868_20141123_032254_outLine +BABEL_OP3_404_45642_20141011_233950_inLine +BABEL_OP3_404_45642_20141011_233950_outLine +BABEL_OP3_404_45770_20141009_185730_inLine +BABEL_OP3_404_45770_20141009_185730_outLine +BABEL_OP3_404_45777_20141028_195713_inLine +BABEL_OP3_404_45777_20141028_195713_outLine +BABEL_OP3_404_45843_20141124_042608_inLine +BABEL_OP3_404_45843_20141124_042608_outLine +BABEL_OP3_404_46008_20150525_024936_inLine +BABEL_OP3_404_46008_20150525_024936_outLine +BABEL_OP3_404_46261_20141117_200301_inLine +BABEL_OP3_404_46261_20141117_200301_outLine +BABEL_OP3_404_46389_20150216_043700_inLine +BABEL_OP3_404_46389_20150216_043700_outLine +BABEL_OP3_404_46558_20141020_013256_inLine +BABEL_OP3_404_46558_20141020_013256_outLine +BABEL_OP3_404_46589_20141126_010932_inLine +BABEL_OP3_404_46589_20141126_010932_outLine +BABEL_OP3_404_46702_20141021_004925_inLine +BABEL_OP3_404_46702_20141021_004925_outLine +BABEL_OP3_404_47110_20150211_041423_inLine +BABEL_OP3_404_47110_20150211_041423_outLine +BABEL_OP3_404_47186_20141130_032126_inLine +BABEL_OP3_404_47186_20141130_032126_outLine +BABEL_OP3_404_47215_20141016_012848_inLine +BABEL_OP3_404_47215_20141016_012848_outLine +BABEL_OP3_404_47283_20141105_063730_inLine +BABEL_OP3_404_47283_20141105_063730_outLine +BABEL_OP3_404_47451_20141201_044107_inLine +BABEL_OP3_404_47451_20141201_044107_outLine +BABEL_OP3_404_47451_20141201_045923_inLine +BABEL_OP3_404_47451_20141201_045923_outLine +BABEL_OP3_404_47878_20141115_030044_inLine +BABEL_OP3_404_47878_20141115_030044_outLine +BABEL_OP3_404_48789_20141130_013950_inLine +BABEL_OP3_404_48789_20141130_013950_outLine +BABEL_OP3_404_49001_20141102_054949_inLine +BABEL_OP3_404_49001_20141102_054949_outLine +BABEL_OP3_404_49216_20141023_021720_inLine +BABEL_OP3_404_49216_20141023_021720_outLine +BABEL_OP3_404_49287_20141201_003931_inLine +BABEL_OP3_404_49287_20141201_003931_outLine +BABEL_OP3_404_49502_20141012_055001_inLine +BABEL_OP3_404_49502_20141012_055001_outLine +BABEL_OP3_404_49637_20141006_052951_inLine +BABEL_OP3_404_49637_20141006_052951_outLine +BABEL_OP3_404_50090_20141119_215921_inLine +BABEL_OP3_404_50090_20141119_215921_outLine +BABEL_OP3_404_50427_20141108_184045_inLine +BABEL_OP3_404_50427_20141108_184045_outLine +BABEL_OP3_404_50630_20141123_224108_inLine +BABEL_OP3_404_50630_20141123_224108_outLine +BABEL_OP3_404_50681_20141119_074034_inLine +BABEL_OP3_404_50681_20141119_074034_outLine +BABEL_OP3_404_50726_20141021_005526_inLine +BABEL_OP3_404_50726_20141021_005526_outLine +BABEL_OP3_404_50958_20141118_184358_inLine +BABEL_OP3_404_50958_20141118_184358_outLine +BABEL_OP3_404_50958_20141118_185604_inLine +BABEL_OP3_404_50958_20141118_185604_outLine +BABEL_OP3_404_50962_20141107_060744_inLine +BABEL_OP3_404_50962_20141107_060744_outLine +BABEL_OP3_404_51407_20141117_062029_inLine +BABEL_OP3_404_51407_20141117_062029_outLine +BABEL_OP3_404_51611_20141022_024919_inLine +BABEL_OP3_404_51611_20141022_024919_outLine +BABEL_OP3_404_51819_20141126_211917_inLine +BABEL_OP3_404_51819_20141126_211917_outLine +BABEL_OP3_404_52272_20141006_031940_inLine +BABEL_OP3_404_52272_20141006_031940_outLine +BABEL_OP3_404_52438_20141104_034612_inLine +BABEL_OP3_404_52438_20141104_034612_outLine +BABEL_OP3_404_52442_20141109_004908_inLine +BABEL_OP3_404_52442_20141109_004908_outLine +BABEL_OP3_404_52614_20150503_200805_inLine +BABEL_OP3_404_52614_20150503_200805_outLine +BABEL_OP3_404_52694_20141121_043410_inLine +BABEL_OP3_404_52694_20141121_043410_outLine +BABEL_OP3_404_52717_20141014_234034_inLine +BABEL_OP3_404_52717_20141014_234034_outLine +BABEL_OP3_404_52818_20141130_231525_inLine +BABEL_OP3_404_52818_20141130_231525_outLine +BABEL_OP3_404_52932_20141101_234724_inLine +BABEL_OP3_404_52932_20141101_234724_outLine +BABEL_OP3_404_53419_20141201_030819_inLine +BABEL_OP3_404_53419_20141201_030819_outLine +BABEL_OP3_404_53842_20141119_044935_inLine +BABEL_OP3_404_53842_20141119_044935_outLine +BABEL_OP3_404_54074_20141129_060147_inLine +BABEL_OP3_404_54074_20141129_060147_outLine +BABEL_OP3_404_54162_20141119_032442_inLine +BABEL_OP3_404_54162_20141119_032442_outLine +BABEL_OP3_404_54390_20141028_230702_inLine +BABEL_OP3_404_54390_20141028_230702_outLine +BABEL_OP3_404_54530_20141130_011651_inLine +BABEL_OP3_404_54530_20141130_011651_outLine +BABEL_OP3_404_54697_20141201_053854_inLine +BABEL_OP3_404_54697_20141201_053854_outLine +BABEL_OP3_404_54953_20141115_022411_inLine +BABEL_OP3_404_54953_20141115_022411_outLine +BABEL_OP3_404_55742_20141102_071943_inLine +BABEL_OP3_404_55742_20141102_071943_outLine +BABEL_OP3_404_55818_20141014_062259_inLine +BABEL_OP3_404_55818_20141014_062259_outLine +BABEL_OP3_404_55950_20150502_234657_inLine +BABEL_OP3_404_55950_20150502_234657_outLine +BABEL_OP3_404_55968_20141009_231223_inLine +BABEL_OP3_404_55968_20141009_231223_outLine +BABEL_OP3_404_56090_20141019_172050_inLine +BABEL_OP3_404_56090_20141019_172050_outLine +BABEL_OP3_404_56198_20141103_031752_inLine +BABEL_OP3_404_56198_20141103_031752_outLine +BABEL_OP3_404_56307_20141201_210608_inLine +BABEL_OP3_404_56307_20141201_210608_outLine +BABEL_OP3_404_56370_20141010_013542_inLine +BABEL_OP3_404_56370_20141010_013542_outLine +BABEL_OP3_404_56429_20141024_003551_inLine +BABEL_OP3_404_56429_20141024_003551_outLine +BABEL_OP3_404_56523_20141114_215534_inLine +BABEL_OP3_404_56523_20141114_215534_outLine +BABEL_OP3_404_56720_20141129_182808_inLine +BABEL_OP3_404_56720_20141129_182808_outLine +BABEL_OP3_404_56720_20141129_183649_inLine +BABEL_OP3_404_56720_20141129_183649_outLine +BABEL_OP3_404_57093_20141118_034107_inLine +BABEL_OP3_404_57093_20141118_034107_outLine +BABEL_OP3_404_57116_20141008_023139_inLine +BABEL_OP3_404_57116_20141008_023139_outLine +BABEL_OP3_404_57529_20141201_050129_inLine +BABEL_OP3_404_57529_20141201_050129_outLine +BABEL_OP3_404_57548_20141119_194430_inLine +BABEL_OP3_404_57548_20141119_194430_outLine +BABEL_OP3_404_57609_20141117_063904_inLine +BABEL_OP3_404_57609_20141117_063904_outLine +BABEL_OP3_404_57609_20141119_223552_inLine +BABEL_OP3_404_57609_20141119_223552_outLine +BABEL_OP3_404_57922_20141119_172249_inLine +BABEL_OP3_404_57922_20141119_172249_outLine +BABEL_OP3_404_57935_20141122_233816_inLine +BABEL_OP3_404_57935_20141122_233816_outLine +BABEL_OP3_404_58107_20141107_223929_inLine +BABEL_OP3_404_58107_20141107_223929_outLine +BABEL_OP3_404_58145_20141120_014653_inLine +BABEL_OP3_404_58145_20141120_014653_outLine +BABEL_OP3_404_58489_20141201_035927_inLine +BABEL_OP3_404_58489_20141201_035927_outLine +BABEL_OP3_404_58717_20141106_221300_inLine +BABEL_OP3_404_58717_20141106_221300_outLine +BABEL_OP3_404_58734_20141019_223233_inLine +BABEL_OP3_404_58734_20141019_223233_outLine +BABEL_OP3_404_58815_20141129_230108_inLine +BABEL_OP3_404_58815_20141129_230108_outLine +BABEL_OP3_404_58821_20141128_224222_inLine +BABEL_OP3_404_58821_20141128_224222_outLine +BABEL_OP3_404_58850_20141116_234915_inLine +BABEL_OP3_404_58850_20141116_234915_outLine +BABEL_OP3_404_58926_20141105_025457_inLine +BABEL_OP3_404_58926_20141105_025457_outLine +BABEL_OP3_404_59163_20150212_233430_inLine +BABEL_OP3_404_59163_20150212_233430_outLine +BABEL_OP3_404_59291_20141129_223855_inLine +BABEL_OP3_404_59291_20141129_223855_outLine +BABEL_OP3_404_59509_20141120_010036_inLine +BABEL_OP3_404_59509_20141120_010036_outLine +BABEL_OP3_404_59747_20141020_002625_inLine +BABEL_OP3_404_59747_20141020_002625_outLine +BABEL_OP3_404_59928_20141107_063850_inLine +BABEL_OP3_404_59928_20141107_063850_outLine +BABEL_OP3_404_59993_20141102_204023_inLine +BABEL_OP3_404_59993_20141102_204023_outLine +BABEL_OP3_404_60115_20141123_045055_inLine +BABEL_OP3_404_60115_20141123_045055_outLine +BABEL_OP3_404_60418_20141201_012853_inLine +BABEL_OP3_404_60418_20141201_012853_outLine +BABEL_OP3_404_60538_20141010_000421_inLine +BABEL_OP3_404_60538_20141010_000421_outLine +BABEL_OP3_404_60661_20141023_185331_inLine +BABEL_OP3_404_60661_20141023_185331_outLine +BABEL_OP3_404_60830_20141119_050849_inLine +BABEL_OP3_404_60830_20141119_050849_outLine +BABEL_OP3_404_60836_20141026_014449_inLine +BABEL_OP3_404_60836_20141026_014449_outLine +BABEL_OP3_404_61011_20141022_235244_inLine +BABEL_OP3_404_61011_20141022_235244_outLine +BABEL_OP3_404_61357_20141118_052326_inLine +BABEL_OP3_404_61357_20141118_052326_outLine +BABEL_OP3_404_61731_20141026_185743_inLine +BABEL_OP3_404_61731_20141026_185743_outLine +BABEL_OP3_404_62014_20141120_021455_inLine +BABEL_OP3_404_62014_20141120_021455_outLine +BABEL_OP3_404_62177_20150503_025324_inLine +BABEL_OP3_404_62177_20150503_025324_outLine +BABEL_OP3_404_62200_20141115_024033_inLine +BABEL_OP3_404_62200_20141115_024033_outLine +BABEL_OP3_404_62289_20150526_045908_inLine +BABEL_OP3_404_62289_20150526_045908_outLine +BABEL_OP3_404_62430_20150526_181036_inLine +BABEL_OP3_404_62430_20150526_181036_outLine +BABEL_OP3_404_62434_20141019_201121_inLine +BABEL_OP3_404_62434_20141019_201121_outLine +BABEL_OP3_404_62656_20150119_185511_inLine +BABEL_OP3_404_62656_20150119_185511_outLine +BABEL_OP3_404_62800_20141020_020318_inLine +BABEL_OP3_404_62800_20141020_020318_outLine +BABEL_OP3_404_62835_20141119_043323_inLine +BABEL_OP3_404_62835_20141119_043323_outLine +BABEL_OP3_404_62976_20141119_061748_inLine +BABEL_OP3_404_62976_20141119_061748_outLine +BABEL_OP3_404_63307_20141119_192444_inLine +BABEL_OP3_404_63307_20141119_192444_outLine +BABEL_OP3_404_63445_20141021_013007_inLine +BABEL_OP3_404_63445_20141021_013007_outLine +BABEL_OP3_404_63523_20150512_050203_inLine +BABEL_OP3_404_63523_20150512_050203_outLine +BABEL_OP3_404_63604_20141011_021042_inLine +BABEL_OP3_404_63604_20141011_021042_outLine +BABEL_OP3_404_63787_20141010_225937_inLine +BABEL_OP3_404_63787_20141010_225937_outLine +BABEL_OP3_404_63938_20150526_052814_inLine +BABEL_OP3_404_63938_20150526_052814_outLine +BABEL_OP3_404_64350_20141022_195842_inLine +BABEL_OP3_404_64350_20141022_195842_outLine +BABEL_OP3_404_64398_20141126_031756_inLine +BABEL_OP3_404_64398_20141126_031756_outLine +BABEL_OP3_404_64902_20150522_041540_inLine +BABEL_OP3_404_64902_20150522_041540_outLine +BABEL_OP3_404_65064_20141127_003631_inLine +BABEL_OP3_404_65064_20141127_003631_outLine +BABEL_OP3_404_65077_20141015_025834_inLine +BABEL_OP3_404_65077_20141015_025834_outLine +BABEL_OP3_404_65466_20150524_182317_inLine +BABEL_OP3_404_65466_20150524_182317_outLine +BABEL_OP3_404_65477_20141115_020305_inLine +BABEL_OP3_404_65477_20141115_020305_outLine +BABEL_OP3_404_65692_20141117_074414_inLine +BABEL_OP3_404_65692_20141117_074414_outLine +BABEL_OP3_404_65723_20141102_051040_inLine +BABEL_OP3_404_65723_20141102_051040_outLine +BABEL_OP3_404_65882_20141024_191236_inLine +BABEL_OP3_404_65882_20141024_191236_outLine +BABEL_OP3_404_66001_20141006_015944_inLine +BABEL_OP3_404_66001_20141006_015944_outLine +BABEL_OP3_404_66026_20141130_061639_inLine +BABEL_OP3_404_66026_20141130_061639_outLine +BABEL_OP3_404_66350_20150212_043953_inLine +BABEL_OP3_404_66350_20150212_043953_outLine +BABEL_OP3_404_66959_20141130_212725_inLine +BABEL_OP3_404_66959_20141130_212725_outLine +BABEL_OP3_404_66975_20150119_001417_inLine +BABEL_OP3_404_66975_20150119_001417_outLine +BABEL_OP3_404_67066_20150611_043029_inLine +BABEL_OP3_404_67066_20150611_043029_outLine +BABEL_OP3_404_67283_20141008_234315_inLine +BABEL_OP3_404_67283_20141008_234315_outLine +BABEL_OP3_404_67373_20141106_191525_inLine +BABEL_OP3_404_67373_20141106_191525_outLine +BABEL_OP3_404_67373_20141106_192955_inLine +BABEL_OP3_404_67373_20141106_192955_outLine +BABEL_OP3_404_67622_20141021_002234_inLine +BABEL_OP3_404_67622_20141021_002234_outLine +BABEL_OP3_404_67659_20141101_010904_inLine +BABEL_OP3_404_67659_20141101_010904_outLine +BABEL_OP3_404_67964_20150515_011635_inLine +BABEL_OP3_404_67964_20150515_011635_outLine +BABEL_OP3_404_68040_20141118_235516_inLine +BABEL_OP3_404_68040_20141118_235516_outLine +BABEL_OP3_404_68748_20141123_003226_inLine +BABEL_OP3_404_68748_20141123_003226_outLine +BABEL_OP3_404_68854_20150512_025452_inLine +BABEL_OP3_404_68854_20150512_025452_outLine +BABEL_OP3_404_68924_20141119_025325_inLine +BABEL_OP3_404_68924_20141119_025325_outLine +BABEL_OP3_404_69992_20141014_035441_inLine +BABEL_OP3_404_69992_20141014_035441_outLine +BABEL_OP3_404_70110_20141020_043016_inLine +BABEL_OP3_404_70110_20141020_043016_outLine +BABEL_OP3_404_70251_20141009_221726_inLine +BABEL_OP3_404_70251_20141009_221726_outLine +BABEL_OP3_404_70293_20150118_220441_inLine +BABEL_OP3_404_70293_20150118_220441_outLine +BABEL_OP3_404_70343_20141126_030147_inLine +BABEL_OP3_404_70343_20141126_030147_outLine +BABEL_OP3_404_70386_20141029_002717_inLine +BABEL_OP3_404_70386_20141029_002717_outLine +BABEL_OP3_404_70452_20141028_031043_inLine +BABEL_OP3_404_70452_20141028_031043_outLine +BABEL_OP3_404_70601_20141103_194852_inLine +BABEL_OP3_404_70601_20141103_194852_outLine +BABEL_OP3_404_71704_20141021_001821_inLine +BABEL_OP3_404_71704_20141021_001821_outLine +BABEL_OP3_404_71704_20141021_002603_inLine +BABEL_OP3_404_71704_20141021_002603_outLine +BABEL_OP3_404_72007_20141201_045843_inLine +BABEL_OP3_404_72007_20141201_045843_outLine +BABEL_OP3_404_72040_20141103_035957_inLine +BABEL_OP3_404_72040_20141103_035957_outLine +BABEL_OP3_404_72040_20141103_042101_inLine +BABEL_OP3_404_72040_20141103_042101_outLine +BABEL_OP3_404_72110_20141128_013317_inLine +BABEL_OP3_404_72110_20141128_013317_outLine +BABEL_OP3_404_72324_20141201_013717_inLine +BABEL_OP3_404_72324_20141201_013717_outLine +BABEL_OP3_404_72654_20141110_003307_inLine +BABEL_OP3_404_72654_20141110_003307_outLine +BABEL_OP3_404_73042_20141022_163748_inLine +BABEL_OP3_404_73042_20141022_163748_outLine +BABEL_OP3_404_73301_20141101_210322_inLine +BABEL_OP3_404_73301_20141101_210322_outLine +BABEL_OP3_404_73446_20150513_002217_inLine +BABEL_OP3_404_73446_20150513_002217_outLine +BABEL_OP3_404_73511_20141129_045420_inLine +BABEL_OP3_404_73511_20141129_045420_outLine +BABEL_OP3_404_73549_20150619_204148_inLine +BABEL_OP3_404_73549_20150619_204148_outLine +BABEL_OP3_404_73591_20141018_022404_inLine +BABEL_OP3_404_73591_20141018_022404_outLine +BABEL_OP3_404_73622_20141016_060513_inLine +BABEL_OP3_404_73622_20141016_060513_outLine +BABEL_OP3_404_73814_20141120_180559_inLine +BABEL_OP3_404_73814_20141120_180559_outLine +BABEL_OP3_404_74226_20141130_235823_inLine +BABEL_OP3_404_74226_20141130_235823_outLine +BABEL_OP3_404_74253_20141201_231036_inLine +BABEL_OP3_404_74253_20141201_231036_outLine +BABEL_OP3_404_74280_20141010_230433_inLine +BABEL_OP3_404_74280_20141010_230433_outLine +BABEL_OP3_404_74667_20141114_221123_inLine +BABEL_OP3_404_74667_20141114_221123_outLine +BABEL_OP3_404_74886_20141022_200909_inLine +BABEL_OP3_404_74886_20141022_200909_outLine +BABEL_OP3_404_74921_20141124_030609_inLine +BABEL_OP3_404_74921_20141124_030609_outLine +BABEL_OP3_404_75223_20141012_224637_inLine +BABEL_OP3_404_75223_20141012_224637_outLine +BABEL_OP3_404_75342_20141130_193132_inLine +BABEL_OP3_404_75342_20141130_193132_outLine +BABEL_OP3_404_75930_20150206_063407_inLine +BABEL_OP3_404_75930_20150206_063407_outLine +BABEL_OP3_404_75993_20141102_192754_inLine +BABEL_OP3_404_75993_20141102_192754_outLine +BABEL_OP3_404_76155_20141118_052757_inLine +BABEL_OP3_404_76155_20141118_052757_outLine +BABEL_OP3_404_76218_20141119_232010_inLine +BABEL_OP3_404_76218_20141119_232010_outLine +BABEL_OP3_404_76499_20141117_005535_inLine +BABEL_OP3_404_76499_20141117_005535_outLine +BABEL_OP3_404_76756_20141120_014151_inLine +BABEL_OP3_404_76756_20141120_014151_outLine +BABEL_OP3_404_77033_20150503_233304_inLine +BABEL_OP3_404_77033_20150503_233304_outLine +BABEL_OP3_404_77112_20141105_062419_inLine +BABEL_OP3_404_77112_20141105_062419_outLine +BABEL_OP3_404_77139_20141022_022951_inLine +BABEL_OP3_404_77139_20141022_022951_outLine +BABEL_OP3_404_77744_20141103_034001_inLine +BABEL_OP3_404_77744_20141103_034001_outLine +BABEL_OP3_404_78116_20141128_231322_inLine +BABEL_OP3_404_78116_20141128_231322_outLine +BABEL_OP3_404_78194_20141019_052949_inLine +BABEL_OP3_404_78194_20141019_052949_outLine +BABEL_OP3_404_78398_20141022_235403_inLine +BABEL_OP3_404_78398_20141022_235403_outLine +BABEL_OP3_404_78544_20141130_192658_inLine +BABEL_OP3_404_78544_20141130_192658_outLine +BABEL_OP3_404_78604_20141022_164244_inLine +BABEL_OP3_404_78604_20141022_164244_outLine +BABEL_OP3_404_78630_20141025_220904_inLine +BABEL_OP3_404_78630_20141025_220904_outLine +BABEL_OP3_404_78743_20141202_001451_inLine +BABEL_OP3_404_78743_20141202_001451_outLine +BABEL_OP3_404_78943_20141025_004503_inLine +BABEL_OP3_404_78943_20141025_004503_outLine +BABEL_OP3_404_79028_20150213_002817_inLine +BABEL_OP3_404_79028_20150213_002817_outLine +BABEL_OP3_404_79107_20150614_013139_inLine +BABEL_OP3_404_79107_20150614_013139_outLine +BABEL_OP3_404_79129_20141110_183305_inLine +BABEL_OP3_404_79129_20141110_183305_outLine +BABEL_OP3_404_79367_20141008_232735_inLine +BABEL_OP3_404_79367_20141008_232735_outLine +BABEL_OP3_404_79451_20141031_025601_inLine +BABEL_OP3_404_79451_20141031_025601_outLine +BABEL_OP3_404_79995_20141201_013108_inLine +BABEL_OP3_404_79995_20141201_013108_outLine +BABEL_OP3_404_80622_20141119_054644_inLine +BABEL_OP3_404_80622_20141119_054644_outLine +BABEL_OP3_404_80721_20141201_013404_inLine +BABEL_OP3_404_80721_20141201_013404_outLine +BABEL_OP3_404_81287_20141130_024232_inLine +BABEL_OP3_404_81287_20141130_024232_outLine +BABEL_OP3_404_81392_20141130_022613_inLine +BABEL_OP3_404_81392_20141130_022613_outLine +BABEL_OP3_404_81392_20141130_023326_inLine +BABEL_OP3_404_81392_20141130_023326_outLine +BABEL_OP3_404_81404_20141104_055546_inLine +BABEL_OP3_404_81404_20141104_055546_outLine +BABEL_OP3_404_81433_20141119_073031_inLine +BABEL_OP3_404_81433_20141119_073031_outLine +BABEL_OP3_404_81435_20141128_235050_inLine +BABEL_OP3_404_81435_20141128_235050_outLine +BABEL_OP3_404_81622_20141129_212937_inLine +BABEL_OP3_404_81622_20141129_212937_outLine +BABEL_OP3_404_81810_20141126_051528_inLine +BABEL_OP3_404_81810_20141126_051528_outLine +BABEL_OP3_404_82030_20150517_193420_inLine +BABEL_OP3_404_82030_20150517_193420_outLine +BABEL_OP3_404_82035_20141119_063429_inLine +BABEL_OP3_404_82035_20141119_063429_outLine +BABEL_OP3_404_82138_20141116_234338_inLine +BABEL_OP3_404_82138_20141116_234338_outLine +BABEL_OP3_404_82140_20141117_021927_inLine +BABEL_OP3_404_82140_20141117_021927_outLine +BABEL_OP3_404_82145_20150502_232707_inLine +BABEL_OP3_404_82145_20150502_232707_outLine +BABEL_OP3_404_82391_20141128_063323_inLine +BABEL_OP3_404_82391_20141128_063323_outLine +BABEL_OP3_404_82496_20141009_062659_inLine +BABEL_OP3_404_82496_20141009_062659_outLine +BABEL_OP3_404_82622_20141008_042910_inLine +BABEL_OP3_404_82622_20141008_042910_outLine +BABEL_OP3_404_82904_20150523_231750_inLine +BABEL_OP3_404_82904_20150523_231750_outLine +BABEL_OP3_404_83455_20141112_000643_inLine +BABEL_OP3_404_83455_20141112_000643_outLine +BABEL_OP3_404_83783_20141115_005815_inLine +BABEL_OP3_404_83783_20141115_005815_outLine +BABEL_OP3_404_83935_20141201_214527_inLine +BABEL_OP3_404_83935_20141201_214527_outLine +BABEL_OP3_404_84327_20141130_185722_inLine +BABEL_OP3_404_84327_20141130_185722_outLine +BABEL_OP3_404_84408_20141105_182756_inLine +BABEL_OP3_404_84408_20141105_182756_outLine +BABEL_OP3_404_84469_20141130_030156_inLine +BABEL_OP3_404_84469_20141130_030156_outLine +BABEL_OP3_404_84547_20141022_025230_inLine +BABEL_OP3_404_84547_20141022_025230_outLine +BABEL_OP3_404_84605_20141026_234127_inLine +BABEL_OP3_404_84605_20141026_234127_outLine +BABEL_OP3_404_84611_20141024_005352_inLine +BABEL_OP3_404_84611_20141024_005352_outLine +BABEL_OP3_404_84768_20141012_183416_inLine +BABEL_OP3_404_84768_20141012_183416_outLine +BABEL_OP3_404_84823_20141201_061552_inLine +BABEL_OP3_404_84823_20141201_061552_outLine +BABEL_OP3_404_84936_20141130_025359_inLine +BABEL_OP3_404_84936_20141130_025359_outLine +BABEL_OP3_404_85647_20141111_231451_inLine +BABEL_OP3_404_85647_20141111_231451_outLine +BABEL_OP3_404_86321_20141127_025302_inLine +BABEL_OP3_404_86321_20141127_025302_outLine +BABEL_OP3_404_86433_20141201_005203_inLine +BABEL_OP3_404_86433_20141201_005203_outLine +BABEL_OP3_404_86433_20141201_010208_inLine +BABEL_OP3_404_86433_20141201_010208_outLine +BABEL_OP3_404_86433_20141201_011757_inLine +BABEL_OP3_404_86433_20141201_011757_outLine +BABEL_OP3_404_86467_20141019_022847_inLine +BABEL_OP3_404_86467_20141019_022847_outLine +BABEL_OP3_404_86467_20141019_024243_inLine +BABEL_OP3_404_86467_20141019_024243_outLine +BABEL_OP3_404_86557_20141021_041027_inLine +BABEL_OP3_404_86557_20141021_041027_outLine +BABEL_OP3_404_86676_20141125_223657_inLine +BABEL_OP3_404_86676_20141125_223657_outLine +BABEL_OP3_404_86952_20141008_194318_inLine +BABEL_OP3_404_86952_20141008_194318_outLine +BABEL_OP3_404_87073_20141007_223759_inLine +BABEL_OP3_404_87073_20141007_223759_outLine +BABEL_OP3_404_87280_20141201_232519_inLine +BABEL_OP3_404_87280_20141201_232519_outLine +BABEL_OP3_404_87693_20141105_002311_inLine +BABEL_OP3_404_87693_20141105_002311_outLine +BABEL_OP3_404_88601_20141115_021916_inLine +BABEL_OP3_404_88601_20141115_021916_outLine +BABEL_OP3_404_88601_20141115_024632_inLine +BABEL_OP3_404_88601_20141115_024632_outLine +BABEL_OP3_404_88686_20141019_023828_inLine +BABEL_OP3_404_88686_20141019_023828_outLine +BABEL_OP3_404_88925_20141201_043633_inLine +BABEL_OP3_404_88925_20141201_043633_outLine +BABEL_OP3_404_88982_20141106_212556_inLine +BABEL_OP3_404_88982_20141106_212556_outLine +BABEL_OP3_404_89358_20141119_055634_inLine +BABEL_OP3_404_89358_20141119_055634_outLine +BABEL_OP3_404_89695_20141115_212119_inLine +BABEL_OP3_404_89695_20141115_212119_outLine +BABEL_OP3_404_89794_20141130_055655_inLine +BABEL_OP3_404_89794_20141130_055655_outLine +BABEL_OP3_404_89877_20141120_061055_inLine +BABEL_OP3_404_89877_20141120_061055_outLine +BABEL_OP3_404_90417_20150611_052409_inLine +BABEL_OP3_404_90417_20150611_052409_outLine +BABEL_OP3_404_90737_20141116_233627_inLine +BABEL_OP3_404_90737_20141116_233627_outLine +BABEL_OP3_404_90739_20141116_034352_inLine +BABEL_OP3_404_90739_20141116_034352_outLine +BABEL_OP3_404_90777_20141115_012657_inLine +BABEL_OP3_404_90777_20141115_012657_outLine +BABEL_OP3_404_90935_20141104_195620_inLine +BABEL_OP3_404_90935_20141104_195620_outLine +BABEL_OP3_404_91080_20141119_062453_inLine +BABEL_OP3_404_91080_20141119_062453_outLine +BABEL_OP3_404_91125_20141010_234127_inLine +BABEL_OP3_404_91125_20141010_234127_outLine +BABEL_OP3_404_91336_20141110_011202_inLine +BABEL_OP3_404_91336_20141110_011202_outLine +BABEL_OP3_404_92065_20141201_041019_inLine +BABEL_OP3_404_92065_20141201_041019_outLine +BABEL_OP3_404_92077_20150610_053919_inLine +BABEL_OP3_404_92077_20150610_053919_outLine +BABEL_OP3_404_92459_20141026_000227_inLine +BABEL_OP3_404_92459_20141026_000227_outLine +BABEL_OP3_404_92459_20141026_000839_inLine +BABEL_OP3_404_92459_20141026_000839_outLine +BABEL_OP3_404_92509_20141020_034921_inLine +BABEL_OP3_404_92509_20141020_034921_outLine +BABEL_OP3_404_92527_20141115_024550_inLine +BABEL_OP3_404_92527_20141115_024550_outLine +BABEL_OP3_404_92809_20141009_080406_inLine +BABEL_OP3_404_92809_20141009_080406_outLine +BABEL_OP3_404_92886_20141103_032433_inLine +BABEL_OP3_404_92886_20141103_032433_outLine +BABEL_OP3_404_92941_20141027_175733_inLine +BABEL_OP3_404_92941_20141027_175733_outLine +BABEL_OP3_404_92941_20141027_180356_inLine +BABEL_OP3_404_92941_20141027_180356_outLine +BABEL_OP3_404_93224_20141119_210156_inLine +BABEL_OP3_404_93224_20141119_210156_outLine +BABEL_OP3_404_93411_20141119_193212_inLine +BABEL_OP3_404_93411_20141119_193212_outLine +BABEL_OP3_404_93861_20141111_181324_inLine +BABEL_OP3_404_93861_20141111_181324_outLine +BABEL_OP3_404_93946_20141129_015946_inLine +BABEL_OP3_404_93946_20141129_015946_outLine +BABEL_OP3_404_93964_20141111_213251_inLine +BABEL_OP3_404_93964_20141111_213251_outLine +BABEL_OP3_404_94141_20150516_175827_inLine +BABEL_OP3_404_94141_20150516_175827_outLine +BABEL_OP3_404_94253_20141029_184039_inLine +BABEL_OP3_404_94253_20141029_184039_outLine +BABEL_OP3_404_94409_20141117_003829_inLine +BABEL_OP3_404_94409_20141117_003829_outLine +BABEL_OP3_404_94666_20141119_231115_inLine +BABEL_OP3_404_94666_20141119_231115_outLine +BABEL_OP3_404_94745_20141201_033432_inLine +BABEL_OP3_404_94745_20141201_033432_outLine +BABEL_OP3_404_94923_20141116_230334_inLine +BABEL_OP3_404_94923_20141116_230334_outLine +BABEL_OP3_404_94978_20150528_024921_inLine +BABEL_OP3_404_94978_20150528_024921_outLine +BABEL_OP3_404_95294_20141129_062228_inLine +BABEL_OP3_404_95294_20141129_062228_outLine +BABEL_OP3_404_95467_20150612_031400_inLine +BABEL_OP3_404_95467_20150612_031400_outLine +BABEL_OP3_404_95490_20141021_050016_inLine +BABEL_OP3_404_95490_20141021_050016_outLine +BABEL_OP3_404_95663_20141022_043520_inLine +BABEL_OP3_404_95663_20141022_043520_outLine +BABEL_OP3_404_95670_20141019_224431_inLine +BABEL_OP3_404_95670_20141019_224431_outLine +BABEL_OP3_404_95677_20150220_205948_inLine +BABEL_OP3_404_95677_20150220_205948_outLine +BABEL_OP3_404_95942_20150514_235402_inLine +BABEL_OP3_404_95942_20150514_235402_outLine +BABEL_OP3_404_96088_20150524_191148_inLine +BABEL_OP3_404_96088_20150524_191148_outLine +BABEL_OP3_404_96190_20141107_040725_inLine +BABEL_OP3_404_96190_20141107_040725_outLine +BABEL_OP3_404_96405_20141026_045704_inLine +BABEL_OP3_404_96405_20141026_045704_outLine +BABEL_OP3_404_96820_20141109_204448_inLine +BABEL_OP3_404_96820_20141109_204448_outLine +BABEL_OP3_404_96842_20150610_040559_inLine +BABEL_OP3_404_96842_20150610_040559_outLine +BABEL_OP3_404_96910_20141026_195400_inLine +BABEL_OP3_404_96910_20141026_195400_outLine +BABEL_OP3_404_96934_20141025_223703_inLine +BABEL_OP3_404_96934_20141025_223703_outLine +BABEL_OP3_404_96934_20141025_225156_inLine +BABEL_OP3_404_96934_20141025_225156_outLine +BABEL_OP3_404_96985_20141013_053332_inLine +BABEL_OP3_404_96985_20141013_053332_outLine +BABEL_OP3_404_97363_20141120_034843_inLine +BABEL_OP3_404_97363_20141120_034843_outLine +BABEL_OP3_404_97570_20141120_050344_inLine +BABEL_OP3_404_97570_20141120_050344_outLine +BABEL_OP3_404_98311_20141022_042555_inLine +BABEL_OP3_404_98311_20141022_042555_outLine +BABEL_OP3_404_98356_20141123_013523_inLine +BABEL_OP3_404_98356_20141123_013523_outLine +BABEL_OP3_404_98390_20141014_024134_inLine +BABEL_OP3_404_98390_20141014_024134_outLine +BABEL_OP3_404_98565_20150217_195949_inLine +BABEL_OP3_404_98565_20150217_195949_outLine +BABEL_OP3_404_98580_20141130_022138_inLine +BABEL_OP3_404_98580_20141130_022138_outLine +BABEL_OP3_404_98909_20141027_032903_inLine +BABEL_OP3_404_98909_20141027_032903_outLine +BABEL_OP3_404_99516_20141019_071828_inLine +BABEL_OP3_404_99516_20141019_071828_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/sub-train.list b/egs/babel/s5d/conf/lists/404-georgian/sub-train.list new file mode 100644 index 00000000000..a042ee569ef --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/sub-train.list @@ -0,0 +1,124 @@ +BABEL_OP3_404_11663_20141118_032146_inLine +BABEL_OP3_404_11663_20141118_032146_outLine +BABEL_OP3_404_12242_20141028_021853_inLine +BABEL_OP3_404_12242_20141028_021853_outLine +BABEL_OP3_404_13178_20141129_192909_inLine +BABEL_OP3_404_13178_20141129_192909_outLine +BABEL_OP3_404_14137_20141025_202817_inLine +BABEL_OP3_404_14137_20141025_202817_outLine +BABEL_OP3_404_14875_20141026_230227_inLine +BABEL_OP3_404_14875_20141026_230227_outLine +BABEL_OP3_404_15869_20150218_225936_inLine +BABEL_OP3_404_15869_20150218_225936_outLine +BABEL_OP3_404_17113_20150611_050102_inLine +BABEL_OP3_404_17113_20150611_050102_outLine +BABEL_OP3_404_23505_20141021_032033_inLine +BABEL_OP3_404_23505_20141021_032033_outLine +BABEL_OP3_404_24470_20141111_184651_inLine +BABEL_OP3_404_24470_20141111_184651_outLine +BABEL_OP3_404_24470_20141111_190229_inLine +BABEL_OP3_404_24470_20141111_190229_outLine +BABEL_OP3_404_24679_20141018_015615_inLine +BABEL_OP3_404_24679_20141018_015615_outLine +BABEL_OP3_404_26388_20141026_014207_inLine +BABEL_OP3_404_26388_20141026_014207_outLine +BABEL_OP3_404_27042_20141201_215107_inLine +BABEL_OP3_404_27042_20141201_215107_outLine +BABEL_OP3_404_28538_20141119_005526_inLine +BABEL_OP3_404_28538_20141119_005526_outLine +BABEL_OP3_404_29208_20141106_013309_inLine +BABEL_OP3_404_29208_20141106_013309_outLine +BABEL_OP3_404_30461_20150620_020316_inLine +BABEL_OP3_404_30461_20150620_020316_outLine +BABEL_OP3_404_31979_20141106_000523_inLine +BABEL_OP3_404_31979_20141106_000523_outLine +BABEL_OP3_404_31992_20141014_221817_inLine +BABEL_OP3_404_31992_20141014_221817_outLine +BABEL_OP3_404_37064_20141102_063308_inLine +BABEL_OP3_404_37064_20141102_063308_outLine +BABEL_OP3_404_37281_20141119_053453_inLine +BABEL_OP3_404_37281_20141119_053453_outLine +BABEL_OP3_404_37853_20150602_030625_inLine +BABEL_OP3_404_37853_20150602_030625_outLine +BABEL_OP3_404_40713_20141028_221207_inLine +BABEL_OP3_404_40713_20141028_221207_outLine +BABEL_OP3_404_41680_20141012_040411_inLine +BABEL_OP3_404_41680_20141012_040411_outLine +BABEL_OP3_404_41920_20141008_040539_inLine +BABEL_OP3_404_41920_20141008_040539_outLine +BABEL_OP3_404_42877_20150212_052937_inLine +BABEL_OP3_404_42877_20150212_052937_outLine +BABEL_OP3_404_45121_20150609_055234_inLine +BABEL_OP3_404_45121_20150609_055234_outLine +BABEL_OP3_404_46169_20141130_224339_inLine +BABEL_OP3_404_46169_20141130_224339_outLine +BABEL_OP3_404_46625_20141011_040505_inLine +BABEL_OP3_404_46625_20141011_040505_outLine +BABEL_OP3_404_46681_20141021_040451_inLine +BABEL_OP3_404_46681_20141021_040451_outLine +BABEL_OP3_404_47270_20150512_053415_inLine +BABEL_OP3_404_47270_20150512_053415_outLine +BABEL_OP3_404_48844_20141020_065414_inLine +BABEL_OP3_404_48844_20141020_065414_outLine +BABEL_OP3_404_49768_20141026_022902_inLine +BABEL_OP3_404_49768_20141026_022902_outLine +BABEL_OP3_404_50175_20141021_025726_inLine +BABEL_OP3_404_50175_20141021_025726_outLine +BABEL_OP3_404_52301_20141009_051739_inLine +BABEL_OP3_404_52301_20141009_051739_outLine +BABEL_OP3_404_52301_20141009_054049_inLine +BABEL_OP3_404_52301_20141009_054049_outLine +BABEL_OP3_404_52490_20141016_020323_inLine +BABEL_OP3_404_52490_20141016_020323_outLine +BABEL_OP3_404_56213_20141201_000837_inLine +BABEL_OP3_404_56213_20141201_000837_outLine +BABEL_OP3_404_58103_20141030_002209_inLine +BABEL_OP3_404_58103_20141030_002209_outLine +BABEL_OP3_404_59078_20141111_004941_inLine +BABEL_OP3_404_59078_20141111_004941_outLine +BABEL_OP3_404_61225_20141009_174003_inLine +BABEL_OP3_404_61225_20141009_174003_outLine +BABEL_OP3_404_63220_20141127_033605_inLine +BABEL_OP3_404_63220_20141127_033605_outLine +BABEL_OP3_404_64494_20141026_203549_inLine +BABEL_OP3_404_64494_20141026_203549_outLine +BABEL_OP3_404_64768_20141027_201818_inLine +BABEL_OP3_404_64768_20141027_201818_outLine +BABEL_OP3_404_66916_20141022_000731_inLine +BABEL_OP3_404_66916_20141022_000731_outLine +BABEL_OP3_404_67401_20141109_211809_inLine +BABEL_OP3_404_67401_20141109_211809_outLine +BABEL_OP3_404_68059_20141109_052011_inLine +BABEL_OP3_404_68059_20141109_052011_outLine +BABEL_OP3_404_68068_20141201_054518_inLine +BABEL_OP3_404_68068_20141201_054518_outLine +BABEL_OP3_404_68384_20141130_035214_inLine +BABEL_OP3_404_68384_20141130_035214_outLine +BABEL_OP3_404_68627_20141105_190511_inLine +BABEL_OP3_404_68627_20141105_190511_outLine +BABEL_OP3_404_72844_20141007_033837_inLine +BABEL_OP3_404_72844_20141007_033837_outLine +BABEL_OP3_404_73837_20141026_191037_inLine +BABEL_OP3_404_73837_20141026_191037_outLine +BABEL_OP3_404_78511_20141201_003606_inLine +BABEL_OP3_404_78511_20141201_003606_outLine +BABEL_OP3_404_79139_20141117_054733_inLine +BABEL_OP3_404_79139_20141117_054733_outLine +BABEL_OP3_404_81971_20141022_025641_inLine +BABEL_OP3_404_81971_20141022_025641_outLine +BABEL_OP3_404_83062_20150523_220236_inLine +BABEL_OP3_404_83062_20150523_220236_outLine +BABEL_OP3_404_83775_20141030_230742_inLine +BABEL_OP3_404_83775_20141030_230742_outLine +BABEL_OP3_404_84339_20150502_014143_inLine +BABEL_OP3_404_84339_20150502_014143_outLine +BABEL_OP3_404_86191_20141027_013544_inLine +BABEL_OP3_404_86191_20141027_013544_outLine +BABEL_OP3_404_86888_20141119_022459_inLine +BABEL_OP3_404_86888_20141119_022459_outLine +BABEL_OP3_404_95966_20141129_060246_inLine +BABEL_OP3_404_95966_20141129_060246_outLine +BABEL_OP3_404_97461_20141118_230730_inLine +BABEL_OP3_404_97461_20141118_230730_outLine +BABEL_OP3_404_99487_20141021_053024_inLine +BABEL_OP3_404_99487_20141021_053024_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list b/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list new file mode 100644 index 00000000000..32d863a65ad --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/sub-train.untranscribed.list @@ -0,0 +1,929 @@ +BABEL_OP3_404_10019_20141101_191932_inLine +BABEL_OP3_404_10019_20141101_191932_outLine +BABEL_OP3_404_10058_20150526_034808_inLine +BABEL_OP3_404_10411_20150611_172027_inLine +BABEL_OP3_404_10411_20150611_172027_outLine +BABEL_OP3_404_10416_20141117_064700_inLine +BABEL_OP3_404_10416_20141117_064700_outLine +BABEL_OP3_404_10647_20150514_001106_inLine +BABEL_OP3_404_10647_20150514_001106_outLine +BABEL_OP3_404_10938_20141030_023413_inLine +BABEL_OP3_404_10938_20141030_023413_outLine +BABEL_OP3_404_10974_20141119_205506_inLine +BABEL_OP3_404_10974_20141119_205506_outLine +BABEL_OP3_404_11352_20150513_002642_inLine +BABEL_OP3_404_11352_20150513_002642_outLine +BABEL_OP3_404_11673_20141023_035438_inLine +BABEL_OP3_404_11673_20141023_035438_outLine +BABEL_OP3_404_11681_20141107_190101_inLine +BABEL_OP3_404_11681_20141107_190101_outLine +BABEL_OP3_404_11859_20150611_041737_inLine +BABEL_OP3_404_11859_20150611_041737_outLine +BABEL_OP3_404_12220_20141116_205911_inLine +BABEL_OP3_404_12220_20141116_205911_outLine +BABEL_OP3_404_12609_20150524_172934_inLine +BABEL_OP3_404_12609_20150524_172934_outLine +BABEL_OP3_404_13030_20141101_200709_inLine +BABEL_OP3_404_13030_20141101_200709_outLine +BABEL_OP3_404_13126_20150524_221540_inLine +BABEL_OP3_404_13126_20150524_221540_outLine +BABEL_OP3_404_13324_20141022_200257_inLine +BABEL_OP3_404_13324_20141022_200257_outLine +BABEL_OP3_404_13664_20141012_013523_inLine +BABEL_OP3_404_13664_20141012_013523_outLine +BABEL_OP3_404_13709_20150512_015216_inLine +BABEL_OP3_404_13709_20150512_015216_outLine +BABEL_OP3_404_14158_20141130_030130_inLine +BABEL_OP3_404_14158_20141130_030130_outLine +BABEL_OP3_404_14229_20141029_200136_inLine +BABEL_OP3_404_14229_20141029_200136_outLine +BABEL_OP3_404_14237_20141006_171921_inLine +BABEL_OP3_404_14237_20141006_171921_outLine +BABEL_OP3_404_14440_20141127_213106_inLine +BABEL_OP3_404_14440_20141127_213106_outLine +BABEL_OP3_404_14807_20141110_231934_inLine +BABEL_OP3_404_14807_20141110_231934_outLine +BABEL_OP3_404_14899_20141022_202217_inLine +BABEL_OP3_404_14899_20141022_202217_outLine +BABEL_OP3_404_14929_20141129_192841_inLine +BABEL_OP3_404_14929_20141129_192841_outLine +BABEL_OP3_404_15024_20141118_234824_inLine +BABEL_OP3_404_15024_20141118_234824_outLine +BABEL_OP3_404_15042_20150506_232829_inLine +BABEL_OP3_404_15042_20150506_232829_outLine +BABEL_OP3_404_15382_20141130_213942_inLine +BABEL_OP3_404_15382_20141130_213942_outLine +BABEL_OP3_404_15535_20141129_021659_inLine +BABEL_OP3_404_15535_20141129_021659_outLine +BABEL_OP3_404_15638_20141127_220502_outLine +BABEL_OP3_404_15848_20141006_231138_inLine +BABEL_OP3_404_15848_20141006_231138_outLine +BABEL_OP3_404_15902_20141020_173105_outLine +BABEL_OP3_404_16149_20141010_173548_inLine +BABEL_OP3_404_16149_20141010_173548_outLine +BABEL_OP3_404_16467_20141130_014316_inLine +BABEL_OP3_404_16467_20141130_014316_outLine +BABEL_OP3_404_16467_20141130_015010_inLine +BABEL_OP3_404_16467_20141130_015010_outLine +BABEL_OP3_404_16475_20141116_052010_outLine +BABEL_OP3_404_16601_20141201_041704_inLine +BABEL_OP3_404_16601_20141201_041704_outLine +BABEL_OP3_404_17280_20141103_190330_inLine +BABEL_OP3_404_17280_20141103_190330_outLine +BABEL_OP3_404_17320_20150524_213213_inLine +BABEL_OP3_404_17320_20150524_213213_outLine +BABEL_OP3_404_17420_20150503_201902_inLine +BABEL_OP3_404_17420_20150503_201902_outLine +BABEL_OP3_404_17420_20150527_025815_inLine +BABEL_OP3_404_17420_20150527_025815_outLine +BABEL_OP3_404_17420_20150527_034621_inLine +BABEL_OP3_404_17420_20150527_034621_outLine +BABEL_OP3_404_17520_20141113_032534_inLine +BABEL_OP3_404_17567_20141117_182919_inLine +BABEL_OP3_404_17567_20141117_182919_outLine +BABEL_OP3_404_17573_20141129_035040_inLine +BABEL_OP3_404_17573_20141129_035040_outLine +BABEL_OP3_404_17615_20141201_025917_inLine +BABEL_OP3_404_17615_20141201_025917_outLine +BABEL_OP3_404_17890_20141128_040046_inLine +BABEL_OP3_404_17890_20141128_040046_outLine +BABEL_OP3_404_17923_20141022_231429_outLine +BABEL_OP3_404_18118_20150503_165936_inLine +BABEL_OP3_404_18118_20150503_165936_outLine +BABEL_OP3_404_18291_20150611_062705_outLine +BABEL_OP3_404_18291_20150611_063700_outLine +BABEL_OP3_404_18766_20150610_064349_inLine +BABEL_OP3_404_19120_20150525_014657_inLine +BABEL_OP3_404_19120_20150525_014657_outLine +BABEL_OP3_404_19120_20150525_015635_inLine +BABEL_OP3_404_19120_20150525_015635_outLine +BABEL_OP3_404_19134_20141120_053128_inLine +BABEL_OP3_404_19134_20141120_053128_outLine +BABEL_OP3_404_19703_20141027_004315_inLine +BABEL_OP3_404_19703_20141027_004315_outLine +BABEL_OP3_404_19877_20150506_202237_outLine +BABEL_OP3_404_20133_20141010_195231_inLine +BABEL_OP3_404_20133_20141010_195231_outLine +BABEL_OP3_404_20454_20150218_171143_inLine +BABEL_OP3_404_20454_20150218_171143_outLine +BABEL_OP3_404_20985_20141126_183236_inLine +BABEL_OP3_404_20985_20141126_183236_outLine +BABEL_OP3_404_21004_20141201_035831_inLine +BABEL_OP3_404_21004_20141201_035831_outLine +BABEL_OP3_404_21159_20150615_021612_inLine +BABEL_OP3_404_21435_20150523_030702_inLine +BABEL_OP3_404_21435_20150523_030702_outLine +BABEL_OP3_404_21581_20141101_011021_inLine +BABEL_OP3_404_21581_20141101_011021_outLine +BABEL_OP3_404_21807_20141112_225225_outLine +BABEL_OP3_404_22280_20141111_020522_inLine +BABEL_OP3_404_22280_20141111_020522_outLine +BABEL_OP3_404_22591_20150217_220714_inLine +BABEL_OP3_404_23046_20141031_030755_inLine +BABEL_OP3_404_23046_20141031_030755_outLine +BABEL_OP3_404_23731_20141130_033602_inLine +BABEL_OP3_404_23731_20141130_033602_outLine +BABEL_OP3_404_23980_20141106_225951_inLine +BABEL_OP3_404_23980_20141106_225951_outLine +BABEL_OP3_404_24209_20150212_224614_inLine +BABEL_OP3_404_24239_20150517_203015_inLine +BABEL_OP3_404_24270_20141111_012902_inLine +BABEL_OP3_404_24270_20141111_012902_outLine +BABEL_OP3_404_24323_20141117_020615_outLine +BABEL_OP3_404_24501_20150522_030231_inLine +BABEL_OP3_404_24532_20141007_211325_inLine +BABEL_OP3_404_24532_20141007_211325_outLine +BABEL_OP3_404_24586_20150524_190657_inLine +BABEL_OP3_404_24586_20150524_190657_outLine +BABEL_OP3_404_24589_20141031_020641_inLine +BABEL_OP3_404_24589_20141031_020641_outLine +BABEL_OP3_404_24590_20141116_230233_inLine +BABEL_OP3_404_24590_20141116_230233_outLine +BABEL_OP3_404_24982_20141102_021352_inLine +BABEL_OP3_404_24982_20141102_021352_outLine +BABEL_OP3_404_25068_20150206_022730_outLine +BABEL_OP3_404_25085_20150611_040906_inLine +BABEL_OP3_404_25085_20150611_040906_outLine +BABEL_OP3_404_25412_20141120_031532_inLine +BABEL_OP3_404_25412_20141120_031532_outLine +BABEL_OP3_404_25496_20150613_034126_inLine +BABEL_OP3_404_25496_20150613_034126_outLine +BABEL_OP3_404_26398_20150527_032152_inLine +BABEL_OP3_404_26398_20150527_032152_outLine +BABEL_OP3_404_26478_20150617_004029_inLine +BABEL_OP3_404_26478_20150617_004029_outLine +BABEL_OP3_404_26836_20141102_024528_inLine +BABEL_OP3_404_26836_20141102_024528_outLine +BABEL_OP3_404_27203_20141119_185720_inLine +BABEL_OP3_404_27203_20141119_185720_outLine +BABEL_OP3_404_27203_20141119_191138_inLine +BABEL_OP3_404_27203_20141119_191138_outLine +BABEL_OP3_404_27590_20141128_051454_inLine +BABEL_OP3_404_28280_20150619_024509_inLine +BABEL_OP3_404_28280_20150619_024509_outLine +BABEL_OP3_404_28280_20150619_025848_inLine +BABEL_OP3_404_28280_20150619_025848_outLine +BABEL_OP3_404_28303_20141028_182204_inLine +BABEL_OP3_404_28303_20141028_182204_outLine +BABEL_OP3_404_28522_20141124_222758_inLine +BABEL_OP3_404_28522_20141124_222758_outLine +BABEL_OP3_404_28600_20141201_223206_inLine +BABEL_OP3_404_28600_20141201_223206_outLine +BABEL_OP3_404_28871_20141019_181913_inLine +BABEL_OP3_404_28871_20141019_181913_outLine +BABEL_OP3_404_28945_20141104_060349_outLine +BABEL_OP3_404_29039_20141128_035839_inLine +BABEL_OP3_404_29039_20141128_035839_outLine +BABEL_OP3_404_29076_20141109_215142_inLine +BABEL_OP3_404_29076_20141109_215142_outLine +BABEL_OP3_404_29230_20150611_051340_inLine +BABEL_OP3_404_29230_20150611_051340_outLine +BABEL_OP3_404_29439_20150524_201524_inLine +BABEL_OP3_404_29439_20150524_201524_outLine +BABEL_OP3_404_30098_20150610_150504_inLine +BABEL_OP3_404_30098_20150610_150504_outLine +BABEL_OP3_404_30432_20141126_052839_inLine +BABEL_OP3_404_30432_20141126_052839_outLine +BABEL_OP3_404_30497_20150525_194737_inLine +BABEL_OP3_404_30497_20150525_194737_outLine +BABEL_OP3_404_30645_20141019_220859_inLine +BABEL_OP3_404_30653_20150514_014515_inLine +BABEL_OP3_404_31267_20150615_011004_outLine +BABEL_OP3_404_31484_20141122_232804_inLine +BABEL_OP3_404_31484_20141122_232804_outLine +BABEL_OP3_404_31624_20141105_214349_inLine +BABEL_OP3_404_31624_20141105_214349_outLine +BABEL_OP3_404_31919_20150526_220911_inLine +BABEL_OP3_404_31919_20150526_220911_outLine +BABEL_OP3_404_32122_20141115_022841_inLine +BABEL_OP3_404_32122_20141115_022841_outLine +BABEL_OP3_404_32287_20150210_060823_inLine +BABEL_OP3_404_32287_20150210_060823_outLine +BABEL_OP3_404_32630_20150609_012137_inLine +BABEL_OP3_404_32630_20150609_012137_outLine +BABEL_OP3_404_32708_20141106_032826_inLine +BABEL_OP3_404_32708_20141106_032826_outLine +BABEL_OP3_404_32727_20141128_203500_inLine +BABEL_OP3_404_32727_20141128_203500_outLine +BABEL_OP3_404_32727_20141128_204751_inLine +BABEL_OP3_404_32727_20141128_204751_outLine +BABEL_OP3_404_32959_20141201_005331_inLine +BABEL_OP3_404_32959_20141201_005331_outLine +BABEL_OP3_404_32998_20141112_054111_inLine +BABEL_OP3_404_33355_20141019_032024_inLine +BABEL_OP3_404_33355_20141019_032024_outLine +BABEL_OP3_404_33355_20141019_034109_inLine +BABEL_OP3_404_33355_20141019_034109_outLine +BABEL_OP3_404_33704_20141207_073436_inLine +BABEL_OP3_404_33704_20141207_073436_outLine +BABEL_OP3_404_34328_20141119_054513_outLine +BABEL_OP3_404_34328_20141119_055432_outLine +BABEL_OP3_404_34679_20141102_052808_inLine +BABEL_OP3_404_34679_20141102_052808_outLine +BABEL_OP3_404_34688_20141009_073303_inLine +BABEL_OP3_404_34688_20141009_073303_outLine +BABEL_OP3_404_34811_20141109_001009_inLine +BABEL_OP3_404_34811_20141109_001009_outLine +BABEL_OP3_404_34899_20150611_060602_outLine +BABEL_OP3_404_35008_20141201_023042_inLine +BABEL_OP3_404_35008_20141201_023042_outLine +BABEL_OP3_404_35143_20141130_181111_inLine +BABEL_OP3_404_35143_20141130_181111_outLine +BABEL_OP3_404_35181_20150526_211416_inLine +BABEL_OP3_404_35181_20150526_211416_outLine +BABEL_OP3_404_35706_20150523_015900_inLine +BABEL_OP3_404_35706_20150523_015900_outLine +BABEL_OP3_404_35786_20150604_015518_inLine +BABEL_OP3_404_35786_20150604_015518_outLine +BABEL_OP3_404_36017_20150528_192934_inLine +BABEL_OP3_404_36017_20150528_192934_outLine +BABEL_OP3_404_36039_20150526_230125_inLine +BABEL_OP3_404_36039_20150526_230125_outLine +BABEL_OP3_404_36059_20150601_023254_inLine +BABEL_OP3_404_36059_20150601_023254_outLine +BABEL_OP3_404_36059_20150601_033346_inLine +BABEL_OP3_404_36059_20150601_033346_outLine +BABEL_OP3_404_36147_20150211_013803_outLine +BABEL_OP3_404_36219_20141104_012216_inLine +BABEL_OP3_404_36219_20141104_012216_outLine +BABEL_OP3_404_36642_20150610_161207_inLine +BABEL_OP3_404_36642_20150610_161207_outLine +BABEL_OP3_404_37290_20141115_050457_inLine +BABEL_OP3_404_37290_20141115_050457_outLine +BABEL_OP3_404_37598_20141119_045926_inLine +BABEL_OP3_404_37598_20141119_045926_outLine +BABEL_OP3_404_37682_20141101_221445_inLine +BABEL_OP3_404_37682_20141101_221445_outLine +BABEL_OP3_404_38125_20150526_233108_inLine +BABEL_OP3_404_38125_20150526_233108_outLine +BABEL_OP3_404_38323_20150615_021843_inLine +BABEL_OP3_404_38340_20141103_231545_inLine +BABEL_OP3_404_38340_20141103_231545_outLine +BABEL_OP3_404_38554_20141010_224451_inLine +BABEL_OP3_404_38554_20141010_224451_outLine +BABEL_OP3_404_38588_20141118_163844_inLine +BABEL_OP3_404_38588_20141118_163844_outLine +BABEL_OP3_404_38664_20141030_175135_inLine +BABEL_OP3_404_38664_20141030_175135_outLine +BABEL_OP3_404_38979_20150503_202406_outLine +BABEL_OP3_404_39099_20150511_053646_outLine +BABEL_OP3_404_39307_20141022_200554_inLine +BABEL_OP3_404_39307_20141022_201758_inLine +BABEL_OP3_404_39426_20150527_181901_outLine +BABEL_OP3_404_39744_20141023_002710_inLine +BABEL_OP3_404_39893_20150611_034149_inLine +BABEL_OP3_404_39920_20150503_205354_outLine +BABEL_OP3_404_40557_20141127_200639_inLine +BABEL_OP3_404_40557_20141127_200639_outLine +BABEL_OP3_404_40939_20150210_212748_inLine +BABEL_OP3_404_40939_20150210_212748_outLine +BABEL_OP3_404_41097_20141129_055801_inLine +BABEL_OP3_404_41097_20141129_055801_outLine +BABEL_OP3_404_41100_20141021_022126_inLine +BABEL_OP3_404_41100_20141021_022126_outLine +BABEL_OP3_404_41272_20150503_232941_inLine +BABEL_OP3_404_41334_20150617_041322_inLine +BABEL_OP3_404_41400_20150515_021408_inLine +BABEL_OP3_404_41609_20141009_013405_inLine +BABEL_OP3_404_41609_20141009_013405_outLine +BABEL_OP3_404_41692_20150604_005657_inLine +BABEL_OP3_404_41692_20150604_005657_outLine +BABEL_OP3_404_41745_20141114_235452_inLine +BABEL_OP3_404_41745_20141114_235452_outLine +BABEL_OP3_404_41958_20141029_212755_inLine +BABEL_OP3_404_41958_20141029_212755_outLine +BABEL_OP3_404_42155_20141127_055149_inLine +BABEL_OP3_404_42619_20141130_012456_outLine +BABEL_OP3_404_42834_20141125_004837_inLine +BABEL_OP3_404_42834_20141125_004837_outLine +BABEL_OP3_404_42883_20150604_035732_inLine +BABEL_OP3_404_42883_20150604_035732_outLine +BABEL_OP3_404_43368_20141031_010629_inLine +BABEL_OP3_404_43368_20141031_010629_outLine +BABEL_OP3_404_43388_20141114_212210_inLine +BABEL_OP3_404_43388_20141114_214120_inLine +BABEL_OP3_404_43588_20150517_233637_inLine +BABEL_OP3_404_43789_20141120_011327_outLine +BABEL_OP3_404_44114_20150614_012319_inLine +BABEL_OP3_404_44114_20150614_012319_outLine +BABEL_OP3_404_44309_20150525_022635_inLine +BABEL_OP3_404_44309_20150525_022635_outLine +BABEL_OP3_404_44477_20141201_180604_inLine +BABEL_OP3_404_44477_20141201_180604_outLine +BABEL_OP3_404_44478_20150512_225118_inLine +BABEL_OP3_404_44847_20141130_221248_inLine +BABEL_OP3_404_44847_20141130_221248_outLine +BABEL_OP3_404_45106_20141119_050859_inLine +BABEL_OP3_404_45106_20141119_050859_outLine +BABEL_OP3_404_45374_20150122_014830_outLine +BABEL_OP3_404_45374_20150122_015920_outLine +BABEL_OP3_404_45459_20150525_020410_inLine +BABEL_OP3_404_45459_20150525_020410_outLine +BABEL_OP3_404_45560_20141012_030417_inLine +BABEL_OP3_404_45560_20141012_030417_outLine +BABEL_OP3_404_45699_20150205_021829_inLine +BABEL_OP3_404_45851_20150514_155157_inLine +BABEL_OP3_404_45851_20150514_155157_outLine +BABEL_OP3_404_45908_20150515_004218_outLine +BABEL_OP3_404_46268_20141019_032022_inLine +BABEL_OP3_404_46268_20141019_032022_outLine +BABEL_OP3_404_46310_20141015_051100_inLine +BABEL_OP3_404_46310_20141015_051100_outLine +BABEL_OP3_404_46315_20141129_012912_inLine +BABEL_OP3_404_46315_20141129_012912_outLine +BABEL_OP3_404_46550_20141105_072519_inLine +BABEL_OP3_404_46550_20141105_072519_outLine +BABEL_OP3_404_46688_20141015_211329_inLine +BABEL_OP3_404_46688_20141015_211329_outLine +BABEL_OP3_404_46712_20141027_224004_inLine +BABEL_OP3_404_46712_20141027_224004_outLine +BABEL_OP3_404_46881_20141012_020055_inLine +BABEL_OP3_404_46881_20141012_020055_outLine +BABEL_OP3_404_46974_20141128_055136_inLine +BABEL_OP3_404_46974_20141128_055136_outLine +BABEL_OP3_404_46976_20141107_183806_inLine +BABEL_OP3_404_46976_20141107_183806_outLine +BABEL_OP3_404_47156_20150625_025324_inLine +BABEL_OP3_404_47156_20150625_025324_outLine +BABEL_OP3_404_47802_20141110_200430_inLine +BABEL_OP3_404_47802_20141110_200430_outLine +BABEL_OP3_404_47823_20141201_044425_inLine +BABEL_OP3_404_47823_20141201_044425_outLine +BABEL_OP3_404_48016_20150615_000741_inLine +BABEL_OP3_404_48016_20150615_000741_outLine +BABEL_OP3_404_48243_20141023_200903_inLine +BABEL_OP3_404_48243_20141023_200903_outLine +BABEL_OP3_404_48610_20141013_011505_inLine +BABEL_OP3_404_48610_20141013_012904_inLine +BABEL_OP3_404_48663_20150512_202837_inLine +BABEL_OP3_404_48663_20150512_202837_outLine +BABEL_OP3_404_49197_20141117_024730_inLine +BABEL_OP3_404_49197_20141117_024730_outLine +BABEL_OP3_404_49306_20150524_003356_inLine +BABEL_OP3_404_49306_20150524_003356_outLine +BABEL_OP3_404_49630_20141128_020114_inLine +BABEL_OP3_404_49630_20141128_020114_outLine +BABEL_OP3_404_49767_20150613_050113_inLine +BABEL_OP3_404_49767_20150613_050113_outLine +BABEL_OP3_404_49775_20141011_005306_inLine +BABEL_OP3_404_49775_20141011_005306_outLine +BABEL_OP3_404_49902_20141101_175534_inLine +BABEL_OP3_404_49902_20141101_175534_outLine +BABEL_OP3_404_49907_20141103_050534_inLine +BABEL_OP3_404_49907_20141103_050534_outLine +BABEL_OP3_404_49945_20150610_154709_inLine +BABEL_OP3_404_50601_20141127_032527_inLine +BABEL_OP3_404_50601_20141127_032527_outLine +BABEL_OP3_404_50745_20150513_162805_inLine +BABEL_OP3_404_50745_20150513_162805_outLine +BABEL_OP3_404_50779_20141115_012852_inLine +BABEL_OP3_404_50779_20141115_012852_outLine +BABEL_OP3_404_50810_20141007_234432_inLine +BABEL_OP3_404_50810_20141007_234432_outLine +BABEL_OP3_404_51015_20141123_193824_inLine +BABEL_OP3_404_51015_20141123_193824_outLine +BABEL_OP3_404_51414_20150604_001601_inLine +BABEL_OP3_404_51414_20150604_001601_outLine +BABEL_OP3_404_51484_20141202_000325_inLine +BABEL_OP3_404_51484_20141202_000325_outLine +BABEL_OP3_404_51701_20150620_010924_outLine +BABEL_OP3_404_52070_20150620_014422_outLine +BABEL_OP3_404_52070_20150620_020559_outLine +BABEL_OP3_404_52246_20141118_035022_inLine +BABEL_OP3_404_52246_20141118_035022_outLine +BABEL_OP3_404_52246_20141118_040850_inLine +BABEL_OP3_404_52246_20141118_040850_outLine +BABEL_OP3_404_52404_20141125_004855_inLine +BABEL_OP3_404_52404_20141125_004855_outLine +BABEL_OP3_404_52725_20150522_222730_inLine +BABEL_OP3_404_52725_20150522_222730_outLine +BABEL_OP3_404_53063_20141201_005237_inLine +BABEL_OP3_404_53063_20141201_005237_outLine +BABEL_OP3_404_53072_20150518_015132_inLine +BABEL_OP3_404_53415_20150503_225920_inLine +BABEL_OP3_404_53415_20150503_225920_outLine +BABEL_OP3_404_53492_20150525_055025_inLine +BABEL_OP3_404_53492_20150525_055025_outLine +BABEL_OP3_404_53665_20150526_004549_inLine +BABEL_OP3_404_53917_20150503_205456_outLine +BABEL_OP3_404_53957_20141201_051933_inLine +BABEL_OP3_404_54104_20141008_214620_inLine +BABEL_OP3_404_54104_20141008_214620_outLine +BABEL_OP3_404_54160_20141009_180704_inLine +BABEL_OP3_404_54160_20141009_180704_outLine +BABEL_OP3_404_54160_20141009_184719_inLine +BABEL_OP3_404_54160_20141009_184719_outLine +BABEL_OP3_404_54160_20141009_185557_inLine +BABEL_OP3_404_54160_20141009_185557_outLine +BABEL_OP3_404_54405_20141117_054820_inLine +BABEL_OP3_404_54405_20141117_054820_outLine +BABEL_OP3_404_54477_20141211_033627_inLine +BABEL_OP3_404_54477_20141211_033627_outLine +BABEL_OP3_404_54744_20141015_012011_inLine +BABEL_OP3_404_54744_20141015_012011_outLine +BABEL_OP3_404_55013_20150525_222257_inLine +BABEL_OP3_404_55013_20150525_222257_outLine +BABEL_OP3_404_55259_20141029_225631_inLine +BABEL_OP3_404_55259_20141029_225631_outLine +BABEL_OP3_404_55267_20141130_212756_inLine +BABEL_OP3_404_55349_20150523_031602_inLine +BABEL_OP3_404_55349_20150523_031602_outLine +BABEL_OP3_404_56019_20150502_020750_inLine +BABEL_OP3_404_56019_20150502_020750_outLine +BABEL_OP3_404_56076_20150516_164959_inLine +BABEL_OP3_404_56076_20150516_164959_outLine +BABEL_OP3_404_56331_20150526_020747_inLine +BABEL_OP3_404_56331_20150526_020747_outLine +BABEL_OP3_404_56743_20141114_223719_inLine +BABEL_OP3_404_56743_20141114_223719_outLine +BABEL_OP3_404_57065_20141201_002920_inLine +BABEL_OP3_404_57219_20150618_045613_inLine +BABEL_OP3_404_57219_20150618_045613_outLine +BABEL_OP3_404_57464_20150523_224617_inLine +BABEL_OP3_404_57542_20150526_233832_inLine +BABEL_OP3_404_57542_20150526_233832_outLine +BABEL_OP3_404_57542_20150526_235003_inLine +BABEL_OP3_404_57542_20150526_235003_outLine +BABEL_OP3_404_57654_20141023_235628_inLine +BABEL_OP3_404_57654_20141023_235628_outLine +BABEL_OP3_404_57678_20141104_023128_inLine +BABEL_OP3_404_57678_20141104_023128_outLine +BABEL_OP3_404_57919_20150127_041057_inLine +BABEL_OP3_404_57919_20150127_041057_outLine +BABEL_OP3_404_58006_20150526_024205_inLine +BABEL_OP3_404_58006_20150526_024205_outLine +BABEL_OP3_404_58026_20150615_004130_inLine +BABEL_OP3_404_58026_20150615_004130_outLine +BABEL_OP3_404_58915_20150611_034220_outLine +BABEL_OP3_404_59262_20141130_212633_inLine +BABEL_OP3_404_59262_20141130_212633_outLine +BABEL_OP3_404_59307_20150504_003405_inLine +BABEL_OP3_404_59307_20150504_003405_outLine +BABEL_OP3_404_59720_20141029_204612_inLine +BABEL_OP3_404_59720_20141029_204612_outLine +BABEL_OP3_404_59864_20150602_014458_inLine +BABEL_OP3_404_60026_20141008_051633_inLine +BABEL_OP3_404_60026_20141008_051633_outLine +BABEL_OP3_404_60299_20150611_040929_inLine +BABEL_OP3_404_60310_20141130_231532_inLine +BABEL_OP3_404_60310_20141130_231532_outLine +BABEL_OP3_404_60352_20141201_060712_inLine +BABEL_OP3_404_60352_20141201_060712_outLine +BABEL_OP3_404_60352_20141201_061821_inLine +BABEL_OP3_404_60352_20141201_061821_outLine +BABEL_OP3_404_60458_20150609_021527_inLine +BABEL_OP3_404_60458_20150609_021527_outLine +BABEL_OP3_404_60474_20141029_182816_inLine +BABEL_OP3_404_60474_20141029_182816_outLine +BABEL_OP3_404_60477_20150613_223056_inLine +BABEL_OP3_404_60477_20150613_224002_inLine +BABEL_OP3_404_60498_20150606_022221_inLine +BABEL_OP3_404_60498_20150606_022221_outLine +BABEL_OP3_404_60626_20141028_212539_inLine +BABEL_OP3_404_60626_20141028_212539_outLine +BABEL_OP3_404_60706_20141020_215729_inLine +BABEL_OP3_404_60706_20141020_215729_outLine +BABEL_OP3_404_61167_20141030_222711_inLine +BABEL_OP3_404_61167_20141030_222711_outLine +BABEL_OP3_404_61219_20141025_193634_inLine +BABEL_OP3_404_61219_20141025_193634_outLine +BABEL_OP3_404_61678_20141019_201928_inLine +BABEL_OP3_404_61678_20141019_201928_outLine +BABEL_OP3_404_61873_20141108_214852_inLine +BABEL_OP3_404_61873_20141108_214852_outLine +BABEL_OP3_404_61888_20150504_171019_inLine +BABEL_OP3_404_61971_20150525_020101_outLine +BABEL_OP3_404_62155_20150522_032307_inLine +BABEL_OP3_404_62155_20150522_032307_outLine +BABEL_OP3_404_62286_20141105_204359_inLine +BABEL_OP3_404_62286_20141105_204359_outLine +BABEL_OP3_404_62360_20150517_033230_inLine +BABEL_OP3_404_62360_20150517_033230_outLine +BABEL_OP3_404_62456_20141108_202333_inLine +BABEL_OP3_404_62456_20141108_202333_outLine +BABEL_OP3_404_62714_20150522_011337_inLine +BABEL_OP3_404_62714_20150522_011337_outLine +BABEL_OP3_404_62724_20141130_200827_inLine +BABEL_OP3_404_62724_20141130_200827_outLine +BABEL_OP3_404_62734_20141029_221513_inLine +BABEL_OP3_404_62734_20141029_221513_outLine +BABEL_OP3_404_62852_20141013_054854_outLine +BABEL_OP3_404_63081_20141021_032233_inLine +BABEL_OP3_404_63081_20141021_032233_outLine +BABEL_OP3_404_63081_20141021_033457_inLine +BABEL_OP3_404_63081_20141021_033457_outLine +BABEL_OP3_404_63084_20141130_221452_inLine +BABEL_OP3_404_63084_20141130_221452_outLine +BABEL_OP3_404_63425_20141126_054504_inLine +BABEL_OP3_404_63481_20141020_221014_outLine +BABEL_OP3_404_63481_20141020_224225_outLine +BABEL_OP3_404_63670_20141130_050318_inLine +BABEL_OP3_404_63670_20141130_050318_outLine +BABEL_OP3_404_63757_20141111_180721_inLine +BABEL_OP3_404_63757_20141111_180721_outLine +BABEL_OP3_404_63906_20150525_050310_inLine +BABEL_OP3_404_63906_20150525_050310_outLine +BABEL_OP3_404_63999_20150610_041309_inLine +BABEL_OP3_404_64014_20150503_032745_inLine +BABEL_OP3_404_64014_20150503_032745_outLine +BABEL_OP3_404_64722_20150514_034208_outLine +BABEL_OP3_404_64759_20141014_044027_inLine +BABEL_OP3_404_64759_20141014_045519_inLine +BABEL_OP3_404_64796_20141022_055826_inLine +BABEL_OP3_404_64870_20141108_192546_inLine +BABEL_OP3_404_64870_20141108_192546_outLine +BABEL_OP3_404_65561_20141124_060558_inLine +BABEL_OP3_404_65561_20141124_060558_outLine +BABEL_OP3_404_65640_20150528_211835_inLine +BABEL_OP3_404_65640_20150528_211835_outLine +BABEL_OP3_404_66045_20141117_035937_inLine +BABEL_OP3_404_66045_20141117_035937_outLine +BABEL_OP3_404_66177_20150503_202932_inLine +BABEL_OP3_404_66177_20150503_202932_outLine +BABEL_OP3_404_66822_20141117_020953_inLine +BABEL_OP3_404_66822_20141117_020953_outLine +BABEL_OP3_404_66967_20141008_202611_inLine +BABEL_OP3_404_66967_20141008_202611_outLine +BABEL_OP3_404_67152_20150503_201836_inLine +BABEL_OP3_404_67152_20150503_201836_outLine +BABEL_OP3_404_67304_20150211_054416_inLine +BABEL_OP3_404_67304_20150211_054416_outLine +BABEL_OP3_404_67552_20141126_011955_inLine +BABEL_OP3_404_67552_20141126_011955_outLine +BABEL_OP3_404_67842_20141104_051753_inLine +BABEL_OP3_404_67842_20141104_051753_outLine +BABEL_OP3_404_68244_20141119_065540_inLine +BABEL_OP3_404_68244_20141119_065540_outLine +BABEL_OP3_404_68306_20141126_180315_inLine +BABEL_OP3_404_68306_20141126_180315_outLine +BABEL_OP3_404_68385_20141017_031005_inLine +BABEL_OP3_404_68385_20141017_031005_outLine +BABEL_OP3_404_68823_20150212_041147_inLine +BABEL_OP3_404_68823_20150212_041147_outLine +BABEL_OP3_404_69096_20150512_165126_inLine +BABEL_OP3_404_69096_20150512_165126_outLine +BABEL_OP3_404_69107_20141120_010459_inLine +BABEL_OP3_404_69107_20141120_010459_outLine +BABEL_OP3_404_69153_20141130_221412_inLine +BABEL_OP3_404_69153_20141130_221412_outLine +BABEL_OP3_404_69153_20141130_222842_inLine +BABEL_OP3_404_69153_20141130_222842_outLine +BABEL_OP3_404_69474_20141128_051323_outLine +BABEL_OP3_404_69574_20141006_023156_inLine +BABEL_OP3_404_69574_20141006_023156_outLine +BABEL_OP3_404_69578_20141117_003921_inLine +BABEL_OP3_404_69578_20141117_003921_outLine +BABEL_OP3_404_69633_20141129_051648_inLine +BABEL_OP3_404_69633_20141129_051648_outLine +BABEL_OP3_404_69636_20141126_061322_inLine +BABEL_OP3_404_69636_20141126_061322_outLine +BABEL_OP3_404_69885_20150503_011226_inLine +BABEL_OP3_404_69885_20150503_011226_outLine +BABEL_OP3_404_69937_20150620_015912_inLine +BABEL_OP3_404_69964_20150524_015556_inLine +BABEL_OP3_404_69964_20150524_015556_outLine +BABEL_OP3_404_69982_20150625_035440_outLine +BABEL_OP3_404_70121_20141104_202610_inLine +BABEL_OP3_404_70121_20141104_202610_outLine +BABEL_OP3_404_70221_20141124_052004_inLine +BABEL_OP3_404_70221_20141124_052004_outLine +BABEL_OP3_404_70282_20141111_000251_inLine +BABEL_OP3_404_70282_20141111_000251_outLine +BABEL_OP3_404_70460_20150527_015340_inLine +BABEL_OP3_404_70460_20150527_015340_outLine +BABEL_OP3_404_70526_20150501_015444_inLine +BABEL_OP3_404_70526_20150501_015444_outLine +BABEL_OP3_404_70713_20150527_013058_inLine +BABEL_OP3_404_70713_20150527_013058_outLine +BABEL_OP3_404_70794_20141021_185105_inLine +BABEL_OP3_404_70794_20141021_185105_outLine +BABEL_OP3_404_71189_20150523_005918_inLine +BABEL_OP3_404_71189_20150523_005918_outLine +BABEL_OP3_404_71263_20141119_234747_inLine +BABEL_OP3_404_71263_20141119_234747_outLine +BABEL_OP3_404_71278_20150211_052730_inLine +BABEL_OP3_404_71278_20150211_052730_outLine +BABEL_OP3_404_71278_20150211_054040_inLine +BABEL_OP3_404_71278_20150211_054040_outLine +BABEL_OP3_404_71333_20141102_023503_inLine +BABEL_OP3_404_71333_20141102_023503_outLine +BABEL_OP3_404_71401_20150206_070446_inLine +BABEL_OP3_404_71401_20150206_070446_outLine +BABEL_OP3_404_71404_20141023_215509_inLine +BABEL_OP3_404_71404_20141023_215509_outLine +BABEL_OP3_404_71460_20150206_015309_outLine +BABEL_OP3_404_71559_20141210_220929_outLine +BABEL_OP3_404_71566_20141130_035713_inLine +BABEL_OP3_404_71566_20141130_035713_outLine +BABEL_OP3_404_71566_20141130_040359_inLine +BABEL_OP3_404_71566_20141130_040359_outLine +BABEL_OP3_404_71780_20141105_055543_inLine +BABEL_OP3_404_71780_20141105_055543_outLine +BABEL_OP3_404_72319_20150502_041426_inLine +BABEL_OP3_404_72319_20150502_041426_outLine +BABEL_OP3_404_72733_20150515_044419_inLine +BABEL_OP3_404_72733_20150515_044419_outLine +BABEL_OP3_404_73072_20141012_012029_inLine +BABEL_OP3_404_73072_20141012_012029_outLine +BABEL_OP3_404_73119_20141026_232203_inLine +BABEL_OP3_404_73119_20141026_232203_outLine +BABEL_OP3_404_73258_20141117_010123_inLine +BABEL_OP3_404_73258_20141117_010123_outLine +BABEL_OP3_404_73485_20150512_234636_inLine +BABEL_OP3_404_73485_20150512_234636_outLine +BABEL_OP3_404_73964_20150512_205010_inLine +BABEL_OP3_404_73964_20150512_205010_outLine +BABEL_OP3_404_74641_20141108_223951_inLine +BABEL_OP3_404_74641_20141108_223951_outLine +BABEL_OP3_404_74728_20150503_042547_inLine +BABEL_OP3_404_74728_20150503_042547_outLine +BABEL_OP3_404_74799_20141109_222638_inLine +BABEL_OP3_404_74799_20141109_222638_outLine +BABEL_OP3_404_75465_20141129_223330_outLine +BABEL_OP3_404_75869_20150527_230650_inLine +BABEL_OP3_404_75869_20150527_230650_outLine +BABEL_OP3_404_75975_20150127_051140_outLine +BABEL_OP3_404_76126_20141201_202238_inLine +BABEL_OP3_404_76126_20141201_202238_outLine +BABEL_OP3_404_76238_20141129_223455_inLine +BABEL_OP3_404_76238_20141129_223455_outLine +BABEL_OP3_404_76372_20150601_014341_inLine +BABEL_OP3_404_76372_20150601_014341_outLine +BABEL_OP3_404_76437_20141019_202715_inLine +BABEL_OP3_404_76437_20141019_202715_outLine +BABEL_OP3_404_76444_20141127_032124_inLine +BABEL_OP3_404_76444_20141127_032124_outLine +BABEL_OP3_404_76482_20150618_063131_outLine +BABEL_OP3_404_76683_20141110_191551_inLine +BABEL_OP3_404_76683_20141110_191551_outLine +BABEL_OP3_404_76837_20150124_222250_outLine +BABEL_OP3_404_76970_20150625_191722_inLine +BABEL_OP3_404_77126_20141022_202348_inLine +BABEL_OP3_404_77126_20141022_202348_outLine +BABEL_OP3_404_77146_20141019_060916_inLine +BABEL_OP3_404_77242_20150612_024655_inLine +BABEL_OP3_404_77391_20141026_222314_inLine +BABEL_OP3_404_77391_20141026_222314_outLine +BABEL_OP3_404_77427_20141030_192713_inLine +BABEL_OP3_404_77427_20141030_192713_outLine +BABEL_OP3_404_77567_20141021_021210_inLine +BABEL_OP3_404_77567_20141021_021210_outLine +BABEL_OP3_404_77730_20141014_201059_inLine +BABEL_OP3_404_77730_20141014_201059_outLine +BABEL_OP3_404_77803_20141020_030844_inLine +BABEL_OP3_404_77803_20141020_030844_outLine +BABEL_OP3_404_77990_20141024_215822_inLine +BABEL_OP3_404_77990_20141024_215822_outLine +BABEL_OP3_404_78016_20141029_233059_inLine +BABEL_OP3_404_78016_20141029_233059_outLine +BABEL_OP3_404_78254_20141025_202742_inLine +BABEL_OP3_404_78254_20141025_202742_outLine +BABEL_OP3_404_78254_20141025_204922_inLine +BABEL_OP3_404_78254_20141025_204922_outLine +BABEL_OP3_404_78454_20141115_043455_inLine +BABEL_OP3_404_78749_20150620_025728_inLine +BABEL_OP3_404_78749_20150620_025728_outLine +BABEL_OP3_404_78976_20141025_183704_inLine +BABEL_OP3_404_78976_20141025_183704_outLine +BABEL_OP3_404_79190_20141108_232204_inLine +BABEL_OP3_404_79190_20141108_232204_outLine +BABEL_OP3_404_79590_20141129_025808_outLine +BABEL_OP3_404_79751_20141101_232250_inLine +BABEL_OP3_404_79751_20141101_232250_outLine +BABEL_OP3_404_79820_20141104_045340_inLine +BABEL_OP3_404_79820_20141104_045340_outLine +BABEL_OP3_404_79858_20141015_200446_inLine +BABEL_OP3_404_79898_20150620_022648_inLine +BABEL_OP3_404_79898_20150620_022648_outLine +BABEL_OP3_404_79898_20150620_024014_inLine +BABEL_OP3_404_79898_20150620_024014_outLine +BABEL_OP3_404_80069_20150614_233606_inLine +BABEL_OP3_404_80069_20150614_233606_outLine +BABEL_OP3_404_80306_20141119_003833_inLine +BABEL_OP3_404_80306_20141119_003833_outLine +BABEL_OP3_404_80306_20141119_005121_inLine +BABEL_OP3_404_80306_20141119_005121_outLine +BABEL_OP3_404_80439_20141026_005410_inLine +BABEL_OP3_404_80439_20141026_005410_outLine +BABEL_OP3_404_80559_20141022_010255_inLine +BABEL_OP3_404_80655_20150525_221544_inLine +BABEL_OP3_404_80655_20150525_221544_outLine +BABEL_OP3_404_80897_20141119_233718_inLine +BABEL_OP3_404_80897_20141119_233718_outLine +BABEL_OP3_404_81149_20150525_003741_inLine +BABEL_OP3_404_81149_20150525_003741_outLine +BABEL_OP3_404_81213_20141102_205052_inLine +BABEL_OP3_404_81213_20141102_205052_outLine +BABEL_OP3_404_81229_20141117_041745_inLine +BABEL_OP3_404_81229_20141117_041745_outLine +BABEL_OP3_404_81427_20141030_015136_inLine +BABEL_OP3_404_81427_20141030_015136_outLine +BABEL_OP3_404_81854_20150610_060437_inLine +BABEL_OP3_404_82089_20141117_045302_inLine +BABEL_OP3_404_82089_20141117_045302_outLine +BABEL_OP3_404_82303_20150614_024236_inLine +BABEL_OP3_404_82303_20150614_024236_outLine +BABEL_OP3_404_82473_20141026_060037_inLine +BABEL_OP3_404_82473_20141026_060037_outLine +BABEL_OP3_404_82626_20150615_014517_inLine +BABEL_OP3_404_82637_20141021_010105_inLine +BABEL_OP3_404_82637_20141021_010105_outLine +BABEL_OP3_404_82742_20141201_234306_inLine +BABEL_OP3_404_82742_20141201_234306_outLine +BABEL_OP3_404_82863_20141119_044230_inLine +BABEL_OP3_404_82863_20141119_044230_outLine +BABEL_OP3_404_83238_20141119_180953_inLine +BABEL_OP3_404_83238_20141119_180953_outLine +BABEL_OP3_404_83366_20141120_192208_inLine +BABEL_OP3_404_83366_20141120_192208_outLine +BABEL_OP3_404_83651_20141102_170912_inLine +BABEL_OP3_404_83651_20141102_170912_outLine +BABEL_OP3_404_83771_20150604_012300_outLine +BABEL_OP3_404_83851_20141028_203735_inLine +BABEL_OP3_404_83851_20141028_203735_outLine +BABEL_OP3_404_83929_20141018_184023_inLine +BABEL_OP3_404_83929_20141018_184023_outLine +BABEL_OP3_404_83974_20150617_022055_inLine +BABEL_OP3_404_84055_20150504_002015_inLine +BABEL_OP3_404_84055_20150504_002015_outLine +BABEL_OP3_404_84061_20141030_205021_inLine +BABEL_OP3_404_84061_20141030_205021_outLine +BABEL_OP3_404_84125_20141018_023340_inLine +BABEL_OP3_404_84125_20141018_023340_outLine +BABEL_OP3_404_84458_20141130_053628_outLine +BABEL_OP3_404_84815_20141127_011952_inLine +BABEL_OP3_404_84815_20141127_013345_inLine +BABEL_OP3_404_85047_20141117_014630_inLine +BABEL_OP3_404_85047_20141117_014630_outLine +BABEL_OP3_404_85048_20141127_023704_inLine +BABEL_OP3_404_85048_20141127_023704_outLine +BABEL_OP3_404_85254_20150620_035606_inLine +BABEL_OP3_404_85254_20150620_035606_outLine +BABEL_OP3_404_85322_20141008_235518_inLine +BABEL_OP3_404_85322_20141008_235518_outLine +BABEL_OP3_404_85340_20141103_022707_inLine +BABEL_OP3_404_85340_20141103_022707_outLine +BABEL_OP3_404_85651_20141211_032650_inLine +BABEL_OP3_404_85651_20141211_032650_outLine +BABEL_OP3_404_86472_20141201_011325_inLine +BABEL_OP3_404_86472_20141201_011325_outLine +BABEL_OP3_404_86597_20150612_170328_inLine +BABEL_OP3_404_86597_20150612_170328_outLine +BABEL_OP3_404_86635_20141127_204158_inLine +BABEL_OP3_404_86635_20141127_204158_outLine +BABEL_OP3_404_86722_20141029_192140_inLine +BABEL_OP3_404_86722_20141029_192140_outLine +BABEL_OP3_404_87074_20141105_190107_outLine +BABEL_OP3_404_87470_20141114_214639_inLine +BABEL_OP3_404_87470_20141114_214639_outLine +BABEL_OP3_404_87629_20141127_020403_inLine +BABEL_OP3_404_87629_20141127_020403_outLine +BABEL_OP3_404_87777_20141127_040747_inLine +BABEL_OP3_404_87777_20141127_040747_outLine +BABEL_OP3_404_87871_20141201_023608_inLine +BABEL_OP3_404_87871_20141201_023608_outLine +BABEL_OP3_404_87921_20141201_023029_inLine +BABEL_OP3_404_87921_20141201_023029_outLine +BABEL_OP3_404_88260_20141103_234824_inLine +BABEL_OP3_404_88260_20141103_234824_outLine +BABEL_OP3_404_88445_20141119_043713_inLine +BABEL_OP3_404_88445_20141119_043713_outLine +BABEL_OP3_404_88661_20141127_025208_inLine +BABEL_OP3_404_88661_20141127_025208_outLine +BABEL_OP3_404_88669_20141119_000147_inLine +BABEL_OP3_404_88669_20141119_000147_outLine +BABEL_OP3_404_88783_20141201_045305_inLine +BABEL_OP3_404_88783_20141201_045305_outLine +BABEL_OP3_404_88873_20141028_190127_inLine +BABEL_OP3_404_88873_20141028_190127_outLine +BABEL_OP3_404_89045_20141022_193202_inLine +BABEL_OP3_404_89045_20141022_193202_outLine +BABEL_OP3_404_89330_20150616_002908_inLine +BABEL_OP3_404_89330_20150616_002908_outLine +BABEL_OP3_404_89372_20141010_000950_inLine +BABEL_OP3_404_89372_20141010_000950_outLine +BABEL_OP3_404_89650_20150220_222402_inLine +BABEL_OP3_404_89650_20150220_222402_outLine +BABEL_OP3_404_89650_20150220_224606_inLine +BABEL_OP3_404_89650_20150220_224606_outLine +BABEL_OP3_404_89665_20141103_202723_inLine +BABEL_OP3_404_89665_20141103_202723_outLine +BABEL_OP3_404_89943_20141105_211847_outLine +BABEL_OP3_404_90347_20141119_012016_inLine +BABEL_OP3_404_90347_20141119_012016_outLine +BABEL_OP3_404_90760_20150611_151739_inLine +BABEL_OP3_404_90760_20150611_151739_outLine +BABEL_OP3_404_90832_20150616_012728_inLine +BABEL_OP3_404_90832_20150616_012728_outLine +BABEL_OP3_404_90930_20150119_021352_inLine +BABEL_OP3_404_90930_20150119_021352_outLine +BABEL_OP3_404_91383_20150618_035815_inLine +BABEL_OP3_404_91463_20141116_023036_inLine +BABEL_OP3_404_91463_20141116_023036_outLine +BABEL_OP3_404_91475_20150614_034536_inLine +BABEL_OP3_404_91581_20141129_045608_inLine +BABEL_OP3_404_91581_20141129_045608_outLine +BABEL_OP3_404_91581_20141129_050730_inLine +BABEL_OP3_404_91581_20141129_050730_outLine +BABEL_OP3_404_91593_20150611_021825_inLine +BABEL_OP3_404_91593_20150611_021825_outLine +BABEL_OP3_404_91825_20141009_181224_inLine +BABEL_OP3_404_91825_20141009_181224_outLine +BABEL_OP3_404_91825_20141009_183843_inLine +BABEL_OP3_404_91825_20141009_183843_outLine +BABEL_OP3_404_91884_20150503_022858_inLine +BABEL_OP3_404_91884_20150503_022858_outLine +BABEL_OP3_404_91888_20150512_191012_inLine +BABEL_OP3_404_91888_20150512_191012_outLine +BABEL_OP3_404_91891_20141129_005825_inLine +BABEL_OP3_404_91891_20141129_005825_outLine +BABEL_OP3_404_91944_20141022_021002_inLine +BABEL_OP3_404_91971_20150217_041455_inLine +BABEL_OP3_404_91971_20150217_041455_outLine +BABEL_OP3_404_91977_20141122_230420_outLine +BABEL_OP3_404_92176_20141119_195614_inLine +BABEL_OP3_404_92176_20141119_195614_outLine +BABEL_OP3_404_92281_20150625_185123_inLine +BABEL_OP3_404_92698_20141117_072302_inLine +BABEL_OP3_404_92698_20141117_072302_outLine +BABEL_OP3_404_92736_20141201_011442_inLine +BABEL_OP3_404_92736_20141201_011442_outLine +BABEL_OP3_404_92757_20150525_200048_inLine +BABEL_OP3_404_92757_20150525_200048_outLine +BABEL_OP3_404_92792_20150503_182854_outLine +BABEL_OP3_404_92792_20150525_025523_outLine +BABEL_OP3_404_92942_20141120_022830_inLine +BABEL_OP3_404_92942_20141120_022830_outLine +BABEL_OP3_404_93007_20150615_051230_inLine +BABEL_OP3_404_93007_20150615_051230_outLine +BABEL_OP3_404_93858_20150611_043732_inLine +BABEL_OP3_404_94002_20141119_015307_inLine +BABEL_OP3_404_94002_20141119_015307_outLine +BABEL_OP3_404_94025_20141129_180207_inLine +BABEL_OP3_404_94025_20141129_180207_outLine +BABEL_OP3_404_94333_20141020_024439_outLine +BABEL_OP3_404_94487_20150518_005132_outLine +BABEL_OP3_404_94869_20141007_194254_inLine +BABEL_OP3_404_94869_20141007_194254_outLine +BABEL_OP3_404_95077_20141201_055702_outLine +BABEL_OP3_404_95269_20141105_221810_inLine +BABEL_OP3_404_95269_20141105_221810_outLine +BABEL_OP3_404_95338_20150610_211203_inLine +BABEL_OP3_404_95338_20150610_211203_outLine +BABEL_OP3_404_95399_20141119_001023_inLine +BABEL_OP3_404_95399_20141119_001023_outLine +BABEL_OP3_404_95583_20141019_010741_inLine +BABEL_OP3_404_95583_20141019_010741_outLine +BABEL_OP3_404_96059_20150524_042224_outLine +BABEL_OP3_404_96205_20141119_033053_inLine +BABEL_OP3_404_96205_20141119_033053_outLine +BABEL_OP3_404_96205_20141119_034909_inLine +BABEL_OP3_404_96205_20141119_034909_outLine +BABEL_OP3_404_96247_20150526_202623_outLine +BABEL_OP3_404_96376_20150503_033706_inLine +BABEL_OP3_404_96376_20150503_033706_outLine +BABEL_OP3_404_96504_20141103_031329_inLine +BABEL_OP3_404_96504_20141103_031329_outLine +BABEL_OP3_404_96690_20141117_053054_inLine +BABEL_OP3_404_96690_20141117_053054_outLine +BABEL_OP3_404_96808_20150609_034129_inLine +BABEL_OP3_404_97097_20150601_042649_outLine +BABEL_OP3_404_97136_20150528_011250_inLine +BABEL_OP3_404_97136_20150528_011250_outLine +BABEL_OP3_404_97557_20141119_230718_inLine +BABEL_OP3_404_97557_20141119_230718_outLine +BABEL_OP3_404_97588_20141018_234016_inLine +BABEL_OP3_404_97588_20141018_234016_outLine +BABEL_OP3_404_97588_20141018_235425_inLine +BABEL_OP3_404_97588_20141018_235425_outLine +BABEL_OP3_404_97896_20141116_221329_inLine +BABEL_OP3_404_97896_20141116_221329_outLine +BABEL_OP3_404_97911_20150613_195820_outLine +BABEL_OP3_404_97988_20141201_030306_inLine +BABEL_OP3_404_97988_20141201_030306_outLine +BABEL_OP3_404_98165_20141030_214051_inLine +BABEL_OP3_404_98165_20141030_214051_outLine +BABEL_OP3_404_98192_20150617_021906_outLine +BABEL_OP3_404_98489_20141102_002030_inLine +BABEL_OP3_404_98489_20141102_004054_inLine +BABEL_OP3_404_98678_20150528_021605_inLine +BABEL_OP3_404_98678_20150528_023029_inLine +BABEL_OP3_404_98888_20141113_212715_inLine +BABEL_OP3_404_98888_20141113_212715_outLine +BABEL_OP3_404_99202_20141108_210814_inLine +BABEL_OP3_404_99202_20141108_210814_outLine +BABEL_OP3_404_99289_20150521_220314_inLine +BABEL_OP3_404_99289_20150521_220314_outLine +BABEL_OP3_404_99289_20150521_222144_inLine +BABEL_OP3_404_99289_20150521_222144_outLine +BABEL_OP3_404_99594_20141105_194545_inLine +BABEL_OP3_404_99594_20141105_194545_outLine +BABEL_OP3_404_99718_20141019_051850_inLine +BABEL_OP3_404_99718_20141019_051850_outLine +BABEL_OP3_404_99718_20141019_053305_inLine +BABEL_OP3_404_99718_20141019_053305_outLine +BABEL_OP3_404_99732_20141130_232553_inLine +BABEL_OP3_404_99732_20141130_232553_outLine +BABEL_OP3_404_99813_20141120_025129_inLine +BABEL_OP3_404_99813_20141120_025129_outLine +BABEL_OP3_404_99920_20141022_052026_inLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/training.list b/egs/babel/s5d/conf/lists/404-georgian/training.list new file mode 100644 index 00000000000..efc0afb8219 --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/training.list @@ -0,0 +1,518 @@ +BABEL_OP3_404_10019_20141101_191932_inLine +BABEL_OP3_404_10019_20141101_191932_outLine +BABEL_OP3_404_10416_20141117_064700_inLine +BABEL_OP3_404_10416_20141117_064700_outLine +BABEL_OP3_404_10647_20150514_001106_inLine +BABEL_OP3_404_10647_20150514_001106_outLine +BABEL_OP3_404_10974_20141119_205506_inLine +BABEL_OP3_404_10974_20141119_205506_outLine +BABEL_OP3_404_11663_20141118_032146_inLine +BABEL_OP3_404_11663_20141118_032146_outLine +BABEL_OP3_404_11673_20141023_035438_inLine +BABEL_OP3_404_11673_20141023_035438_outLine +BABEL_OP3_404_11681_20141107_190101_inLine +BABEL_OP3_404_11681_20141107_190101_outLine +BABEL_OP3_404_12242_20141028_021853_inLine +BABEL_OP3_404_12242_20141028_021853_outLine +BABEL_OP3_404_13030_20141101_200709_inLine +BABEL_OP3_404_13030_20141101_200709_outLine +BABEL_OP3_404_13178_20141129_192909_inLine +BABEL_OP3_404_13178_20141129_192909_outLine +BABEL_OP3_404_13324_20141022_200257_inLine +BABEL_OP3_404_13324_20141022_200257_outLine +BABEL_OP3_404_13664_20141012_013523_inLine +BABEL_OP3_404_13664_20141012_013523_outLine +BABEL_OP3_404_13709_20150512_015216_inLine +BABEL_OP3_404_13709_20150512_015216_outLine +BABEL_OP3_404_14137_20141025_202817_inLine +BABEL_OP3_404_14137_20141025_202817_outLine +BABEL_OP3_404_14229_20141029_200136_inLine +BABEL_OP3_404_14229_20141029_200136_outLine +BABEL_OP3_404_14237_20141006_171921_inLine +BABEL_OP3_404_14237_20141006_171921_outLine +BABEL_OP3_404_14440_20141127_213106_inLine +BABEL_OP3_404_14440_20141127_213106_outLine +BABEL_OP3_404_14807_20141110_231934_inLine +BABEL_OP3_404_14807_20141110_231934_outLine +BABEL_OP3_404_14875_20141026_230227_inLine +BABEL_OP3_404_14875_20141026_230227_outLine +BABEL_OP3_404_14899_20141022_202217_inLine +BABEL_OP3_404_14899_20141022_202217_outLine +BABEL_OP3_404_14929_20141129_192841_inLine +BABEL_OP3_404_14929_20141129_192841_outLine +BABEL_OP3_404_15382_20141130_213942_inLine +BABEL_OP3_404_15382_20141130_213942_outLine +BABEL_OP3_404_15848_20141006_231138_inLine +BABEL_OP3_404_15848_20141006_231138_outLine +BABEL_OP3_404_15869_20150218_225936_inLine +BABEL_OP3_404_15869_20150218_225936_outLine +BABEL_OP3_404_16149_20141010_173548_inLine +BABEL_OP3_404_16149_20141010_173548_outLine +BABEL_OP3_404_16467_20141130_014316_inLine +BABEL_OP3_404_16467_20141130_014316_outLine +BABEL_OP3_404_16467_20141130_015010_inLine +BABEL_OP3_404_16467_20141130_015010_outLine +BABEL_OP3_404_17113_20150611_050102_inLine +BABEL_OP3_404_17113_20150611_050102_outLine +BABEL_OP3_404_17280_20141103_190330_inLine +BABEL_OP3_404_17280_20141103_190330_outLine +BABEL_OP3_404_17615_20141201_025917_inLine +BABEL_OP3_404_17615_20141201_025917_outLine +BABEL_OP3_404_19134_20141120_053128_inLine +BABEL_OP3_404_19134_20141120_053128_outLine +BABEL_OP3_404_19703_20141027_004315_inLine +BABEL_OP3_404_19703_20141027_004315_outLine +BABEL_OP3_404_20133_20141010_195231_inLine +BABEL_OP3_404_20133_20141010_195231_outLine +BABEL_OP3_404_20985_20141126_183236_inLine +BABEL_OP3_404_20985_20141126_183236_outLine +BABEL_OP3_404_21004_20141201_035831_inLine +BABEL_OP3_404_21004_20141201_035831_outLine +BABEL_OP3_404_22280_20141111_020522_inLine +BABEL_OP3_404_22280_20141111_020522_outLine +BABEL_OP3_404_23046_20141031_030755_inLine +BABEL_OP3_404_23046_20141031_030755_outLine +BABEL_OP3_404_23505_20141021_032033_inLine +BABEL_OP3_404_23505_20141021_032033_outLine +BABEL_OP3_404_23731_20141130_033602_inLine +BABEL_OP3_404_23731_20141130_033602_outLine +BABEL_OP3_404_23980_20141106_225951_inLine +BABEL_OP3_404_23980_20141106_225951_outLine +BABEL_OP3_404_24270_20141111_012902_inLine +BABEL_OP3_404_24270_20141111_012902_outLine +BABEL_OP3_404_24470_20141111_184651_inLine +BABEL_OP3_404_24470_20141111_184651_outLine +BABEL_OP3_404_24470_20141111_190229_inLine +BABEL_OP3_404_24470_20141111_190229_outLine +BABEL_OP3_404_24532_20141007_211325_inLine +BABEL_OP3_404_24532_20141007_211325_outLine +BABEL_OP3_404_24589_20141031_020641_inLine +BABEL_OP3_404_24589_20141031_020641_outLine +BABEL_OP3_404_24679_20141018_015615_inLine +BABEL_OP3_404_24679_20141018_015615_outLine +BABEL_OP3_404_24982_20141102_021352_inLine +BABEL_OP3_404_24982_20141102_021352_outLine +BABEL_OP3_404_26388_20141026_014207_inLine +BABEL_OP3_404_26388_20141026_014207_outLine +BABEL_OP3_404_27042_20141201_215107_inLine +BABEL_OP3_404_27042_20141201_215107_outLine +BABEL_OP3_404_28303_20141028_182204_inLine +BABEL_OP3_404_28303_20141028_182204_outLine +BABEL_OP3_404_28522_20141124_222758_inLine +BABEL_OP3_404_28522_20141124_222758_outLine +BABEL_OP3_404_28538_20141119_005526_inLine +BABEL_OP3_404_28538_20141119_005526_outLine +BABEL_OP3_404_28871_20141019_181913_inLine +BABEL_OP3_404_28871_20141019_181913_outLine +BABEL_OP3_404_29039_20141128_035839_inLine +BABEL_OP3_404_29039_20141128_035839_outLine +BABEL_OP3_404_29208_20141106_013309_inLine +BABEL_OP3_404_29208_20141106_013309_outLine +BABEL_OP3_404_30098_20150610_150504_inLine +BABEL_OP3_404_30098_20150610_150504_outLine +BABEL_OP3_404_30432_20141126_052839_inLine +BABEL_OP3_404_30432_20141126_052839_outLine +BABEL_OP3_404_30461_20150620_020316_inLine +BABEL_OP3_404_30461_20150620_020316_outLine +BABEL_OP3_404_31624_20141105_214349_inLine +BABEL_OP3_404_31624_20141105_214349_outLine +BABEL_OP3_404_31979_20141106_000523_inLine +BABEL_OP3_404_31979_20141106_000523_outLine +BABEL_OP3_404_31992_20141014_221817_inLine +BABEL_OP3_404_31992_20141014_221817_outLine +BABEL_OP3_404_32122_20141115_022841_inLine +BABEL_OP3_404_32122_20141115_022841_outLine +BABEL_OP3_404_32287_20150210_060823_inLine +BABEL_OP3_404_32287_20150210_060823_outLine +BABEL_OP3_404_32708_20141106_032826_inLine +BABEL_OP3_404_32708_20141106_032826_outLine +BABEL_OP3_404_32727_20141128_203500_inLine +BABEL_OP3_404_32727_20141128_203500_outLine +BABEL_OP3_404_32727_20141128_204751_inLine +BABEL_OP3_404_32727_20141128_204751_outLine +BABEL_OP3_404_33355_20141019_032024_inLine +BABEL_OP3_404_33355_20141019_032024_outLine +BABEL_OP3_404_33355_20141019_034109_inLine +BABEL_OP3_404_33355_20141019_034109_outLine +BABEL_OP3_404_33704_20141207_073436_inLine +BABEL_OP3_404_33704_20141207_073436_outLine +BABEL_OP3_404_34679_20141102_052808_inLine +BABEL_OP3_404_34679_20141102_052808_outLine +BABEL_OP3_404_34688_20141009_073303_inLine +BABEL_OP3_404_34688_20141009_073303_outLine +BABEL_OP3_404_35143_20141130_181111_inLine +BABEL_OP3_404_35143_20141130_181111_outLine +BABEL_OP3_404_37064_20141102_063308_inLine +BABEL_OP3_404_37064_20141102_063308_outLine +BABEL_OP3_404_37281_20141119_053453_inLine +BABEL_OP3_404_37281_20141119_053453_outLine +BABEL_OP3_404_37598_20141119_045926_inLine +BABEL_OP3_404_37598_20141119_045926_outLine +BABEL_OP3_404_37682_20141101_221445_inLine +BABEL_OP3_404_37682_20141101_221445_outLine +BABEL_OP3_404_37853_20150602_030625_inLine +BABEL_OP3_404_37853_20150602_030625_outLine +BABEL_OP3_404_38588_20141118_163844_inLine +BABEL_OP3_404_38588_20141118_163844_outLine +BABEL_OP3_404_40557_20141127_200639_inLine +BABEL_OP3_404_40557_20141127_200639_outLine +BABEL_OP3_404_40713_20141028_221207_inLine +BABEL_OP3_404_40713_20141028_221207_outLine +BABEL_OP3_404_40939_20150210_212748_inLine +BABEL_OP3_404_40939_20150210_212748_outLine +BABEL_OP3_404_41100_20141021_022126_inLine +BABEL_OP3_404_41100_20141021_022126_outLine +BABEL_OP3_404_41609_20141009_013405_inLine +BABEL_OP3_404_41609_20141009_013405_outLine +BABEL_OP3_404_41680_20141012_040411_inLine +BABEL_OP3_404_41680_20141012_040411_outLine +BABEL_OP3_404_41920_20141008_040539_inLine +BABEL_OP3_404_41920_20141008_040539_outLine +BABEL_OP3_404_41958_20141029_212755_inLine +BABEL_OP3_404_41958_20141029_212755_outLine +BABEL_OP3_404_42877_20150212_052937_inLine +BABEL_OP3_404_42877_20150212_052937_outLine +BABEL_OP3_404_43368_20141031_010629_inLine +BABEL_OP3_404_43368_20141031_010629_outLine +BABEL_OP3_404_44114_20150614_012319_inLine +BABEL_OP3_404_44114_20150614_012319_outLine +BABEL_OP3_404_44477_20141201_180604_inLine +BABEL_OP3_404_44477_20141201_180604_outLine +BABEL_OP3_404_44847_20141130_221248_inLine +BABEL_OP3_404_44847_20141130_221248_outLine +BABEL_OP3_404_45121_20150609_055234_inLine +BABEL_OP3_404_45121_20150609_055234_outLine +BABEL_OP3_404_45560_20141012_030417_inLine +BABEL_OP3_404_45560_20141012_030417_outLine +BABEL_OP3_404_46169_20141130_224339_inLine +BABEL_OP3_404_46169_20141130_224339_outLine +BABEL_OP3_404_46268_20141019_032022_inLine +BABEL_OP3_404_46268_20141019_032022_outLine +BABEL_OP3_404_46550_20141105_072519_inLine +BABEL_OP3_404_46550_20141105_072519_outLine +BABEL_OP3_404_46625_20141011_040505_inLine +BABEL_OP3_404_46625_20141011_040505_outLine +BABEL_OP3_404_46681_20141021_040451_inLine +BABEL_OP3_404_46681_20141021_040451_outLine +BABEL_OP3_404_46881_20141012_020055_inLine +BABEL_OP3_404_46881_20141012_020055_outLine +BABEL_OP3_404_46976_20141107_183806_inLine +BABEL_OP3_404_46976_20141107_183806_outLine +BABEL_OP3_404_47270_20150512_053415_inLine +BABEL_OP3_404_47270_20150512_053415_outLine +BABEL_OP3_404_47802_20141110_200430_inLine +BABEL_OP3_404_47802_20141110_200430_outLine +BABEL_OP3_404_48243_20141023_200903_inLine +BABEL_OP3_404_48243_20141023_200903_outLine +BABEL_OP3_404_48844_20141020_065414_inLine +BABEL_OP3_404_48844_20141020_065414_outLine +BABEL_OP3_404_49197_20141117_024730_inLine +BABEL_OP3_404_49197_20141117_024730_outLine +BABEL_OP3_404_49768_20141026_022902_inLine +BABEL_OP3_404_49768_20141026_022902_outLine +BABEL_OP3_404_49902_20141101_175534_inLine +BABEL_OP3_404_49902_20141101_175534_outLine +BABEL_OP3_404_49907_20141103_050534_inLine +BABEL_OP3_404_49907_20141103_050534_outLine +BABEL_OP3_404_50175_20141021_025726_inLine +BABEL_OP3_404_50175_20141021_025726_outLine +BABEL_OP3_404_50745_20150513_162805_inLine +BABEL_OP3_404_50745_20150513_162805_outLine +BABEL_OP3_404_51015_20141123_193824_inLine +BABEL_OP3_404_51015_20141123_193824_outLine +BABEL_OP3_404_52246_20141118_035022_inLine +BABEL_OP3_404_52246_20141118_035022_outLine +BABEL_OP3_404_52246_20141118_040850_inLine +BABEL_OP3_404_52246_20141118_040850_outLine +BABEL_OP3_404_52301_20141009_051739_inLine +BABEL_OP3_404_52301_20141009_051739_outLine +BABEL_OP3_404_52301_20141009_054049_inLine +BABEL_OP3_404_52301_20141009_054049_outLine +BABEL_OP3_404_52490_20141016_020323_inLine +BABEL_OP3_404_52490_20141016_020323_outLine +BABEL_OP3_404_52725_20150522_222730_inLine +BABEL_OP3_404_52725_20150522_222730_outLine +BABEL_OP3_404_54104_20141008_214620_inLine +BABEL_OP3_404_54104_20141008_214620_outLine +BABEL_OP3_404_54160_20141009_180704_inLine +BABEL_OP3_404_54160_20141009_180704_outLine +BABEL_OP3_404_54160_20141009_184719_inLine +BABEL_OP3_404_54160_20141009_184719_outLine +BABEL_OP3_404_54160_20141009_185557_inLine +BABEL_OP3_404_54160_20141009_185557_outLine +BABEL_OP3_404_54405_20141117_054820_inLine +BABEL_OP3_404_54405_20141117_054820_outLine +BABEL_OP3_404_54744_20141015_012011_inLine +BABEL_OP3_404_54744_20141015_012011_outLine +BABEL_OP3_404_55259_20141029_225631_inLine +BABEL_OP3_404_55259_20141029_225631_outLine +BABEL_OP3_404_56213_20141201_000837_inLine +BABEL_OP3_404_56213_20141201_000837_outLine +BABEL_OP3_404_57654_20141023_235628_inLine +BABEL_OP3_404_57654_20141023_235628_outLine +BABEL_OP3_404_57678_20141104_023128_inLine +BABEL_OP3_404_57678_20141104_023128_outLine +BABEL_OP3_404_57919_20150127_041057_inLine +BABEL_OP3_404_57919_20150127_041057_outLine +BABEL_OP3_404_58103_20141030_002209_inLine +BABEL_OP3_404_58103_20141030_002209_outLine +BABEL_OP3_404_59078_20141111_004941_inLine +BABEL_OP3_404_59078_20141111_004941_outLine +BABEL_OP3_404_59262_20141130_212633_inLine +BABEL_OP3_404_59262_20141130_212633_outLine +BABEL_OP3_404_59720_20141029_204612_inLine +BABEL_OP3_404_59720_20141029_204612_outLine +BABEL_OP3_404_60026_20141008_051633_inLine +BABEL_OP3_404_60026_20141008_051633_outLine +BABEL_OP3_404_60474_20141029_182816_inLine +BABEL_OP3_404_60474_20141029_182816_outLine +BABEL_OP3_404_60626_20141028_212539_inLine +BABEL_OP3_404_60626_20141028_212539_outLine +BABEL_OP3_404_61167_20141030_222711_inLine +BABEL_OP3_404_61167_20141030_222711_outLine +BABEL_OP3_404_61219_20141025_193634_inLine +BABEL_OP3_404_61219_20141025_193634_outLine +BABEL_OP3_404_61225_20141009_174003_inLine +BABEL_OP3_404_61225_20141009_174003_outLine +BABEL_OP3_404_61678_20141019_201928_inLine +BABEL_OP3_404_61678_20141019_201928_outLine +BABEL_OP3_404_61873_20141108_214852_inLine +BABEL_OP3_404_61873_20141108_214852_outLine +BABEL_OP3_404_62155_20150522_032307_inLine +BABEL_OP3_404_62155_20150522_032307_outLine +BABEL_OP3_404_62286_20141105_204359_inLine +BABEL_OP3_404_62286_20141105_204359_outLine +BABEL_OP3_404_62456_20141108_202333_inLine +BABEL_OP3_404_62456_20141108_202333_outLine +BABEL_OP3_404_62714_20150522_011337_inLine +BABEL_OP3_404_62714_20150522_011337_outLine +BABEL_OP3_404_62734_20141029_221513_inLine +BABEL_OP3_404_62734_20141029_221513_outLine +BABEL_OP3_404_63081_20141021_032233_inLine +BABEL_OP3_404_63081_20141021_032233_outLine +BABEL_OP3_404_63081_20141021_033457_inLine +BABEL_OP3_404_63081_20141021_033457_outLine +BABEL_OP3_404_63084_20141130_221452_inLine +BABEL_OP3_404_63084_20141130_221452_outLine +BABEL_OP3_404_63220_20141127_033605_inLine +BABEL_OP3_404_63220_20141127_033605_outLine +BABEL_OP3_404_63757_20141111_180721_inLine +BABEL_OP3_404_63757_20141111_180721_outLine +BABEL_OP3_404_64494_20141026_203549_inLine +BABEL_OP3_404_64494_20141026_203549_outLine +BABEL_OP3_404_64768_20141027_201818_inLine +BABEL_OP3_404_64768_20141027_201818_outLine +BABEL_OP3_404_64870_20141108_192546_inLine +BABEL_OP3_404_64870_20141108_192546_outLine +BABEL_OP3_404_66045_20141117_035937_inLine +BABEL_OP3_404_66045_20141117_035937_outLine +BABEL_OP3_404_66177_20150503_202932_inLine +BABEL_OP3_404_66177_20150503_202932_outLine +BABEL_OP3_404_66822_20141117_020953_inLine +BABEL_OP3_404_66822_20141117_020953_outLine +BABEL_OP3_404_66916_20141022_000731_inLine +BABEL_OP3_404_66916_20141022_000731_outLine +BABEL_OP3_404_67401_20141109_211809_inLine +BABEL_OP3_404_67401_20141109_211809_outLine +BABEL_OP3_404_67842_20141104_051753_inLine +BABEL_OP3_404_67842_20141104_051753_outLine +BABEL_OP3_404_68059_20141109_052011_inLine +BABEL_OP3_404_68059_20141109_052011_outLine +BABEL_OP3_404_68068_20141201_054518_inLine +BABEL_OP3_404_68068_20141201_054518_outLine +BABEL_OP3_404_68244_20141119_065540_inLine +BABEL_OP3_404_68244_20141119_065540_outLine +BABEL_OP3_404_68384_20141130_035214_inLine +BABEL_OP3_404_68384_20141130_035214_outLine +BABEL_OP3_404_68385_20141017_031005_inLine +BABEL_OP3_404_68385_20141017_031005_outLine +BABEL_OP3_404_68627_20141105_190511_inLine +BABEL_OP3_404_68627_20141105_190511_outLine +BABEL_OP3_404_68823_20150212_041147_inLine +BABEL_OP3_404_68823_20150212_041147_outLine +BABEL_OP3_404_69107_20141120_010459_inLine +BABEL_OP3_404_69107_20141120_010459_outLine +BABEL_OP3_404_69574_20141006_023156_inLine +BABEL_OP3_404_69574_20141006_023156_outLine +BABEL_OP3_404_69578_20141117_003921_inLine +BABEL_OP3_404_69578_20141117_003921_outLine +BABEL_OP3_404_70121_20141104_202610_inLine +BABEL_OP3_404_70121_20141104_202610_outLine +BABEL_OP3_404_70282_20141111_000251_inLine +BABEL_OP3_404_70282_20141111_000251_outLine +BABEL_OP3_404_70794_20141021_185105_inLine +BABEL_OP3_404_70794_20141021_185105_outLine +BABEL_OP3_404_71263_20141119_234747_inLine +BABEL_OP3_404_71263_20141119_234747_outLine +BABEL_OP3_404_71401_20150206_070446_inLine +BABEL_OP3_404_71401_20150206_070446_outLine +BABEL_OP3_404_71404_20141023_215509_inLine +BABEL_OP3_404_71404_20141023_215509_outLine +BABEL_OP3_404_71566_20141130_035713_inLine +BABEL_OP3_404_71566_20141130_035713_outLine +BABEL_OP3_404_71566_20141130_040359_inLine +BABEL_OP3_404_71566_20141130_040359_outLine +BABEL_OP3_404_72844_20141007_033837_inLine +BABEL_OP3_404_72844_20141007_033837_outLine +BABEL_OP3_404_73119_20141026_232203_inLine +BABEL_OP3_404_73119_20141026_232203_outLine +BABEL_OP3_404_73485_20150512_234636_inLine +BABEL_OP3_404_73485_20150512_234636_outLine +BABEL_OP3_404_73837_20141026_191037_inLine +BABEL_OP3_404_73837_20141026_191037_outLine +BABEL_OP3_404_74641_20141108_223951_inLine +BABEL_OP3_404_74641_20141108_223951_outLine +BABEL_OP3_404_74799_20141109_222638_inLine +BABEL_OP3_404_74799_20141109_222638_outLine +BABEL_OP3_404_75869_20150527_230650_inLine +BABEL_OP3_404_75869_20150527_230650_outLine +BABEL_OP3_404_76437_20141019_202715_inLine +BABEL_OP3_404_76437_20141019_202715_outLine +BABEL_OP3_404_77126_20141022_202348_inLine +BABEL_OP3_404_77126_20141022_202348_outLine +BABEL_OP3_404_77391_20141026_222314_inLine +BABEL_OP3_404_77391_20141026_222314_outLine +BABEL_OP3_404_77427_20141030_192713_inLine +BABEL_OP3_404_77427_20141030_192713_outLine +BABEL_OP3_404_77730_20141014_201059_inLine +BABEL_OP3_404_77730_20141014_201059_outLine +BABEL_OP3_404_77990_20141024_215822_inLine +BABEL_OP3_404_77990_20141024_215822_outLine +BABEL_OP3_404_78016_20141029_233059_inLine +BABEL_OP3_404_78016_20141029_233059_outLine +BABEL_OP3_404_78254_20141025_202742_inLine +BABEL_OP3_404_78254_20141025_202742_outLine +BABEL_OP3_404_78254_20141025_204922_inLine +BABEL_OP3_404_78254_20141025_204922_outLine +BABEL_OP3_404_78511_20141201_003606_inLine +BABEL_OP3_404_78511_20141201_003606_outLine +BABEL_OP3_404_78976_20141025_183704_inLine +BABEL_OP3_404_78976_20141025_183704_outLine +BABEL_OP3_404_79139_20141117_054733_inLine +BABEL_OP3_404_79139_20141117_054733_outLine +BABEL_OP3_404_79751_20141101_232250_inLine +BABEL_OP3_404_79751_20141101_232250_outLine +BABEL_OP3_404_80439_20141026_005410_inLine +BABEL_OP3_404_80439_20141026_005410_outLine +BABEL_OP3_404_81213_20141102_205052_inLine +BABEL_OP3_404_81213_20141102_205052_outLine +BABEL_OP3_404_81229_20141117_041745_inLine +BABEL_OP3_404_81229_20141117_041745_outLine +BABEL_OP3_404_81971_20141022_025641_inLine +BABEL_OP3_404_81971_20141022_025641_outLine +BABEL_OP3_404_82089_20141117_045302_inLine +BABEL_OP3_404_82089_20141117_045302_outLine +BABEL_OP3_404_82303_20150614_024236_inLine +BABEL_OP3_404_82303_20150614_024236_outLine +BABEL_OP3_404_82473_20141026_060037_inLine +BABEL_OP3_404_82473_20141026_060037_outLine +BABEL_OP3_404_82637_20141021_010105_inLine +BABEL_OP3_404_82637_20141021_010105_outLine +BABEL_OP3_404_82742_20141201_234306_inLine +BABEL_OP3_404_82742_20141201_234306_outLine +BABEL_OP3_404_83062_20150523_220236_inLine +BABEL_OP3_404_83062_20150523_220236_outLine +BABEL_OP3_404_83238_20141119_180953_inLine +BABEL_OP3_404_83238_20141119_180953_outLine +BABEL_OP3_404_83366_20141120_192208_inLine +BABEL_OP3_404_83366_20141120_192208_outLine +BABEL_OP3_404_83775_20141030_230742_inLine +BABEL_OP3_404_83775_20141030_230742_outLine +BABEL_OP3_404_83851_20141028_203735_inLine +BABEL_OP3_404_83851_20141028_203735_outLine +BABEL_OP3_404_83929_20141018_184023_inLine +BABEL_OP3_404_83929_20141018_184023_outLine +BABEL_OP3_404_84055_20150504_002015_inLine +BABEL_OP3_404_84055_20150504_002015_outLine +BABEL_OP3_404_84061_20141030_205021_inLine +BABEL_OP3_404_84061_20141030_205021_outLine +BABEL_OP3_404_84339_20150502_014143_inLine +BABEL_OP3_404_84339_20150502_014143_outLine +BABEL_OP3_404_85048_20141127_023704_inLine +BABEL_OP3_404_85048_20141127_023704_outLine +BABEL_OP3_404_85254_20150620_035606_inLine +BABEL_OP3_404_85254_20150620_035606_outLine +BABEL_OP3_404_85322_20141008_235518_inLine +BABEL_OP3_404_85322_20141008_235518_outLine +BABEL_OP3_404_85651_20141211_032650_inLine +BABEL_OP3_404_85651_20141211_032650_outLine +BABEL_OP3_404_86191_20141027_013544_inLine +BABEL_OP3_404_86191_20141027_013544_outLine +BABEL_OP3_404_86472_20141201_011325_inLine +BABEL_OP3_404_86472_20141201_011325_outLine +BABEL_OP3_404_86635_20141127_204158_inLine +BABEL_OP3_404_86635_20141127_204158_outLine +BABEL_OP3_404_86722_20141029_192140_inLine +BABEL_OP3_404_86722_20141029_192140_outLine +BABEL_OP3_404_86888_20141119_022459_inLine +BABEL_OP3_404_86888_20141119_022459_outLine +BABEL_OP3_404_87470_20141114_214639_inLine +BABEL_OP3_404_87470_20141114_214639_outLine +BABEL_OP3_404_87629_20141127_020403_inLine +BABEL_OP3_404_87629_20141127_020403_outLine +BABEL_OP3_404_88260_20141103_234824_inLine +BABEL_OP3_404_88260_20141103_234824_outLine +BABEL_OP3_404_88445_20141119_043713_inLine +BABEL_OP3_404_88445_20141119_043713_outLine +BABEL_OP3_404_88661_20141127_025208_inLine +BABEL_OP3_404_88661_20141127_025208_outLine +BABEL_OP3_404_88669_20141119_000147_inLine +BABEL_OP3_404_88669_20141119_000147_outLine +BABEL_OP3_404_88783_20141201_045305_inLine +BABEL_OP3_404_88783_20141201_045305_outLine +BABEL_OP3_404_89045_20141022_193202_inLine +BABEL_OP3_404_89045_20141022_193202_outLine +BABEL_OP3_404_89372_20141010_000950_inLine +BABEL_OP3_404_89372_20141010_000950_outLine +BABEL_OP3_404_89650_20150220_222402_inLine +BABEL_OP3_404_89650_20150220_222402_outLine +BABEL_OP3_404_89650_20150220_224606_inLine +BABEL_OP3_404_89650_20150220_224606_outLine +BABEL_OP3_404_89665_20141103_202723_inLine +BABEL_OP3_404_89665_20141103_202723_outLine +BABEL_OP3_404_90930_20150119_021352_inLine +BABEL_OP3_404_90930_20150119_021352_outLine +BABEL_OP3_404_91463_20141116_023036_inLine +BABEL_OP3_404_91463_20141116_023036_outLine +BABEL_OP3_404_91825_20141009_181224_inLine +BABEL_OP3_404_91825_20141009_181224_outLine +BABEL_OP3_404_91825_20141009_183843_inLine +BABEL_OP3_404_91825_20141009_183843_outLine +BABEL_OP3_404_91971_20150217_041455_inLine +BABEL_OP3_404_91971_20150217_041455_outLine +BABEL_OP3_404_92698_20141117_072302_inLine +BABEL_OP3_404_92698_20141117_072302_outLine +BABEL_OP3_404_92736_20141201_011442_inLine +BABEL_OP3_404_92736_20141201_011442_outLine +BABEL_OP3_404_94025_20141129_180207_inLine +BABEL_OP3_404_94025_20141129_180207_outLine +BABEL_OP3_404_94869_20141007_194254_inLine +BABEL_OP3_404_94869_20141007_194254_outLine +BABEL_OP3_404_95966_20141129_060246_inLine +BABEL_OP3_404_95966_20141129_060246_outLine +BABEL_OP3_404_96376_20150503_033706_inLine +BABEL_OP3_404_96376_20150503_033706_outLine +BABEL_OP3_404_96504_20141103_031329_inLine +BABEL_OP3_404_96504_20141103_031329_outLine +BABEL_OP3_404_97461_20141118_230730_inLine +BABEL_OP3_404_97461_20141118_230730_outLine +BABEL_OP3_404_97557_20141119_230718_inLine +BABEL_OP3_404_97557_20141119_230718_outLine +BABEL_OP3_404_97588_20141018_234016_inLine +BABEL_OP3_404_97588_20141018_234016_outLine +BABEL_OP3_404_97588_20141018_235425_inLine +BABEL_OP3_404_97588_20141018_235425_outLine +BABEL_OP3_404_97896_20141116_221329_inLine +BABEL_OP3_404_97896_20141116_221329_outLine +BABEL_OP3_404_97988_20141201_030306_inLine +BABEL_OP3_404_97988_20141201_030306_outLine +BABEL_OP3_404_98888_20141113_212715_inLine +BABEL_OP3_404_98888_20141113_212715_outLine +BABEL_OP3_404_99202_20141108_210814_inLine +BABEL_OP3_404_99202_20141108_210814_outLine +BABEL_OP3_404_99487_20141021_053024_inLine +BABEL_OP3_404_99487_20141021_053024_outLine +BABEL_OP3_404_99594_20141105_194545_inLine +BABEL_OP3_404_99594_20141105_194545_outLine +BABEL_OP3_404_99813_20141120_025129_inLine +BABEL_OP3_404_99813_20141120_025129_outLine diff --git a/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list b/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list new file mode 100644 index 00000000000..8d6682cc789 --- /dev/null +++ b/egs/babel/s5d/conf/lists/404-georgian/untranscribed-training.list @@ -0,0 +1,535 @@ +BABEL_OP3_404_10058_20150526_034808_inLine +BABEL_OP3_404_10411_20150611_172027_inLine +BABEL_OP3_404_10411_20150611_172027_outLine +BABEL_OP3_404_10938_20141030_023413_inLine +BABEL_OP3_404_10938_20141030_023413_outLine +BABEL_OP3_404_11352_20150513_002642_inLine +BABEL_OP3_404_11352_20150513_002642_outLine +BABEL_OP3_404_11859_20150611_041737_inLine +BABEL_OP3_404_11859_20150611_041737_outLine +BABEL_OP3_404_12220_20141116_205911_inLine +BABEL_OP3_404_12220_20141116_205911_outLine +BABEL_OP3_404_12609_20150524_172934_inLine +BABEL_OP3_404_12609_20150524_172934_outLine +BABEL_OP3_404_13126_20150524_221540_inLine +BABEL_OP3_404_13126_20150524_221540_outLine +BABEL_OP3_404_14158_20141130_030130_inLine +BABEL_OP3_404_14158_20141130_030130_outLine +BABEL_OP3_404_15024_20141118_234824_inLine +BABEL_OP3_404_15024_20141118_234824_outLine +BABEL_OP3_404_15042_20150506_232829_inLine +BABEL_OP3_404_15042_20150506_232829_outLine +BABEL_OP3_404_15535_20141129_021659_inLine +BABEL_OP3_404_15535_20141129_021659_outLine +BABEL_OP3_404_15638_20141127_220502_outLine +BABEL_OP3_404_15902_20141020_173105_outLine +BABEL_OP3_404_16475_20141116_052010_outLine +BABEL_OP3_404_16601_20141201_041704_inLine +BABEL_OP3_404_16601_20141201_041704_outLine +BABEL_OP3_404_17320_20150524_213213_inLine +BABEL_OP3_404_17320_20150524_213213_outLine +BABEL_OP3_404_17420_20150503_201902_inLine +BABEL_OP3_404_17420_20150503_201902_outLine +BABEL_OP3_404_17420_20150527_025815_inLine +BABEL_OP3_404_17420_20150527_025815_outLine +BABEL_OP3_404_17420_20150527_034621_inLine +BABEL_OP3_404_17420_20150527_034621_outLine +BABEL_OP3_404_17520_20141113_032534_inLine +BABEL_OP3_404_17567_20141117_182919_inLine +BABEL_OP3_404_17567_20141117_182919_outLine +BABEL_OP3_404_17573_20141129_035040_inLine +BABEL_OP3_404_17573_20141129_035040_outLine +BABEL_OP3_404_17890_20141128_040046_inLine +BABEL_OP3_404_17890_20141128_040046_outLine +BABEL_OP3_404_17923_20141022_231429_outLine +BABEL_OP3_404_18118_20150503_165936_inLine +BABEL_OP3_404_18118_20150503_165936_outLine +BABEL_OP3_404_18291_20150611_062705_outLine +BABEL_OP3_404_18291_20150611_063700_outLine +BABEL_OP3_404_18766_20150610_064349_inLine +BABEL_OP3_404_19120_20150525_014657_inLine +BABEL_OP3_404_19120_20150525_014657_outLine +BABEL_OP3_404_19120_20150525_015635_inLine +BABEL_OP3_404_19120_20150525_015635_outLine +BABEL_OP3_404_19877_20150506_202237_outLine +BABEL_OP3_404_20454_20150218_171143_inLine +BABEL_OP3_404_20454_20150218_171143_outLine +BABEL_OP3_404_21159_20150615_021612_inLine +BABEL_OP3_404_21435_20150523_030702_inLine +BABEL_OP3_404_21435_20150523_030702_outLine +BABEL_OP3_404_21581_20141101_011021_inLine +BABEL_OP3_404_21581_20141101_011021_outLine +BABEL_OP3_404_21807_20141112_225225_outLine +BABEL_OP3_404_22591_20150217_220714_inLine +BABEL_OP3_404_24209_20150212_224614_inLine +BABEL_OP3_404_24239_20150517_203015_inLine +BABEL_OP3_404_24323_20141117_020615_outLine +BABEL_OP3_404_24501_20150522_030231_inLine +BABEL_OP3_404_24586_20150524_190657_inLine +BABEL_OP3_404_24586_20150524_190657_outLine +BABEL_OP3_404_24590_20141116_230233_inLine +BABEL_OP3_404_24590_20141116_230233_outLine +BABEL_OP3_404_25068_20150206_022730_outLine +BABEL_OP3_404_25085_20150611_040906_inLine +BABEL_OP3_404_25085_20150611_040906_outLine +BABEL_OP3_404_25412_20141120_031532_inLine +BABEL_OP3_404_25412_20141120_031532_outLine +BABEL_OP3_404_25496_20150613_034126_inLine +BABEL_OP3_404_25496_20150613_034126_outLine +BABEL_OP3_404_26398_20150527_032152_inLine +BABEL_OP3_404_26398_20150527_032152_outLine +BABEL_OP3_404_26478_20150617_004029_inLine +BABEL_OP3_404_26478_20150617_004029_outLine +BABEL_OP3_404_26836_20141102_024528_inLine +BABEL_OP3_404_26836_20141102_024528_outLine +BABEL_OP3_404_27203_20141119_185720_inLine +BABEL_OP3_404_27203_20141119_185720_outLine +BABEL_OP3_404_27203_20141119_191138_inLine +BABEL_OP3_404_27203_20141119_191138_outLine +BABEL_OP3_404_27590_20141128_051454_inLine +BABEL_OP3_404_28280_20150619_024509_inLine +BABEL_OP3_404_28280_20150619_024509_outLine +BABEL_OP3_404_28280_20150619_025848_inLine +BABEL_OP3_404_28280_20150619_025848_outLine +BABEL_OP3_404_28600_20141201_223206_inLine +BABEL_OP3_404_28600_20141201_223206_outLine +BABEL_OP3_404_28945_20141104_060349_outLine +BABEL_OP3_404_29076_20141109_215142_inLine +BABEL_OP3_404_29076_20141109_215142_outLine +BABEL_OP3_404_29230_20150611_051340_inLine +BABEL_OP3_404_29230_20150611_051340_outLine +BABEL_OP3_404_29439_20150524_201524_inLine +BABEL_OP3_404_29439_20150524_201524_outLine +BABEL_OP3_404_30497_20150525_194737_inLine +BABEL_OP3_404_30497_20150525_194737_outLine +BABEL_OP3_404_30645_20141019_220859_inLine +BABEL_OP3_404_30653_20150514_014515_inLine +BABEL_OP3_404_31267_20150615_011004_outLine +BABEL_OP3_404_31484_20141122_232804_inLine +BABEL_OP3_404_31484_20141122_232804_outLine +BABEL_OP3_404_31919_20150526_220911_inLine +BABEL_OP3_404_31919_20150526_220911_outLine +BABEL_OP3_404_32630_20150609_012137_inLine +BABEL_OP3_404_32630_20150609_012137_outLine +BABEL_OP3_404_32959_20141201_005331_inLine +BABEL_OP3_404_32959_20141201_005331_outLine +BABEL_OP3_404_32998_20141112_054111_inLine +BABEL_OP3_404_34328_20141119_054513_outLine +BABEL_OP3_404_34328_20141119_055432_outLine +BABEL_OP3_404_34811_20141109_001009_inLine +BABEL_OP3_404_34811_20141109_001009_outLine +BABEL_OP3_404_34899_20150611_060602_outLine +BABEL_OP3_404_35008_20141201_023042_inLine +BABEL_OP3_404_35008_20141201_023042_outLine +BABEL_OP3_404_35181_20150526_211416_inLine +BABEL_OP3_404_35181_20150526_211416_outLine +BABEL_OP3_404_35706_20150523_015900_inLine +BABEL_OP3_404_35706_20150523_015900_outLine +BABEL_OP3_404_35786_20150604_015518_inLine +BABEL_OP3_404_35786_20150604_015518_outLine +BABEL_OP3_404_36017_20150528_192934_inLine +BABEL_OP3_404_36017_20150528_192934_outLine +BABEL_OP3_404_36039_20150526_230125_inLine +BABEL_OP3_404_36039_20150526_230125_outLine +BABEL_OP3_404_36059_20150601_023254_inLine +BABEL_OP3_404_36059_20150601_023254_outLine +BABEL_OP3_404_36059_20150601_033346_inLine +BABEL_OP3_404_36059_20150601_033346_outLine +BABEL_OP3_404_36147_20150211_013803_outLine +BABEL_OP3_404_36219_20141104_012216_inLine +BABEL_OP3_404_36219_20141104_012216_outLine +BABEL_OP3_404_36642_20150610_161207_inLine +BABEL_OP3_404_36642_20150610_161207_outLine +BABEL_OP3_404_37290_20141115_050457_inLine +BABEL_OP3_404_37290_20141115_050457_outLine +BABEL_OP3_404_38125_20150526_233108_inLine +BABEL_OP3_404_38125_20150526_233108_outLine +BABEL_OP3_404_38323_20150615_021843_inLine +BABEL_OP3_404_38340_20141103_231545_inLine +BABEL_OP3_404_38340_20141103_231545_outLine +BABEL_OP3_404_38554_20141010_224451_inLine +BABEL_OP3_404_38554_20141010_224451_outLine +BABEL_OP3_404_38664_20141030_175135_inLine +BABEL_OP3_404_38664_20141030_175135_outLine +BABEL_OP3_404_38979_20150503_202406_outLine +BABEL_OP3_404_39099_20150511_053646_outLine +BABEL_OP3_404_39307_20141022_200554_inLine +BABEL_OP3_404_39307_20141022_201758_inLine +BABEL_OP3_404_39426_20150527_181901_outLine +BABEL_OP3_404_39744_20141023_002710_inLine +BABEL_OP3_404_39893_20150611_034149_inLine +BABEL_OP3_404_39920_20150503_205354_outLine +BABEL_OP3_404_41097_20141129_055801_inLine +BABEL_OP3_404_41097_20141129_055801_outLine +BABEL_OP3_404_41272_20150503_232941_inLine +BABEL_OP3_404_41334_20150617_041322_inLine +BABEL_OP3_404_41400_20150515_021408_inLine +BABEL_OP3_404_41692_20150604_005657_inLine +BABEL_OP3_404_41692_20150604_005657_outLine +BABEL_OP3_404_41745_20141114_235452_inLine +BABEL_OP3_404_41745_20141114_235452_outLine +BABEL_OP3_404_42155_20141127_055149_inLine +BABEL_OP3_404_42619_20141130_012456_outLine +BABEL_OP3_404_42834_20141125_004837_inLine +BABEL_OP3_404_42834_20141125_004837_outLine +BABEL_OP3_404_42883_20150604_035732_inLine +BABEL_OP3_404_42883_20150604_035732_outLine +BABEL_OP3_404_43388_20141114_212210_inLine +BABEL_OP3_404_43388_20141114_214120_inLine +BABEL_OP3_404_43588_20150517_233637_inLine +BABEL_OP3_404_43789_20141120_011327_outLine +BABEL_OP3_404_44309_20150525_022635_inLine +BABEL_OP3_404_44309_20150525_022635_outLine +BABEL_OP3_404_44478_20150512_225118_inLine +BABEL_OP3_404_45106_20141119_050859_inLine +BABEL_OP3_404_45106_20141119_050859_outLine +BABEL_OP3_404_45374_20150122_014830_outLine +BABEL_OP3_404_45374_20150122_015920_outLine +BABEL_OP3_404_45459_20150525_020410_inLine +BABEL_OP3_404_45459_20150525_020410_outLine +BABEL_OP3_404_45699_20150205_021829_inLine +BABEL_OP3_404_45851_20150514_155157_inLine +BABEL_OP3_404_45851_20150514_155157_outLine +BABEL_OP3_404_45908_20150515_004218_outLine +BABEL_OP3_404_46310_20141015_051100_inLine +BABEL_OP3_404_46310_20141015_051100_outLine +BABEL_OP3_404_46315_20141129_012912_inLine +BABEL_OP3_404_46315_20141129_012912_outLine +BABEL_OP3_404_46688_20141015_211329_inLine +BABEL_OP3_404_46688_20141015_211329_outLine +BABEL_OP3_404_46712_20141027_224004_inLine +BABEL_OP3_404_46712_20141027_224004_outLine +BABEL_OP3_404_46974_20141128_055136_inLine +BABEL_OP3_404_46974_20141128_055136_outLine +BABEL_OP3_404_47156_20150625_025324_inLine +BABEL_OP3_404_47156_20150625_025324_outLine +BABEL_OP3_404_47823_20141201_044425_inLine +BABEL_OP3_404_47823_20141201_044425_outLine +BABEL_OP3_404_48016_20150615_000741_inLine +BABEL_OP3_404_48016_20150615_000741_outLine +BABEL_OP3_404_48610_20141013_011505_inLine +BABEL_OP3_404_48610_20141013_012904_inLine +BABEL_OP3_404_48663_20150512_202837_inLine +BABEL_OP3_404_48663_20150512_202837_outLine +BABEL_OP3_404_49306_20150524_003356_inLine +BABEL_OP3_404_49306_20150524_003356_outLine +BABEL_OP3_404_49630_20141128_020114_inLine +BABEL_OP3_404_49630_20141128_020114_outLine +BABEL_OP3_404_49767_20150613_050113_inLine +BABEL_OP3_404_49767_20150613_050113_outLine +BABEL_OP3_404_49775_20141011_005306_inLine +BABEL_OP3_404_49775_20141011_005306_outLine +BABEL_OP3_404_49945_20150610_154709_inLine +BABEL_OP3_404_50601_20141127_032527_inLine +BABEL_OP3_404_50601_20141127_032527_outLine +BABEL_OP3_404_50779_20141115_012852_inLine +BABEL_OP3_404_50779_20141115_012852_outLine +BABEL_OP3_404_50810_20141007_234432_inLine +BABEL_OP3_404_50810_20141007_234432_outLine +BABEL_OP3_404_51414_20150604_001601_inLine +BABEL_OP3_404_51414_20150604_001601_outLine +BABEL_OP3_404_51484_20141202_000325_inLine +BABEL_OP3_404_51484_20141202_000325_outLine +BABEL_OP3_404_51701_20150620_010924_outLine +BABEL_OP3_404_52070_20150620_014422_outLine +BABEL_OP3_404_52070_20150620_020559_outLine +BABEL_OP3_404_52404_20141125_004855_inLine +BABEL_OP3_404_52404_20141125_004855_outLine +BABEL_OP3_404_53063_20141201_005237_inLine +BABEL_OP3_404_53063_20141201_005237_outLine +BABEL_OP3_404_53072_20150518_015132_inLine +BABEL_OP3_404_53415_20150503_225920_inLine +BABEL_OP3_404_53415_20150503_225920_outLine +BABEL_OP3_404_53492_20150525_055025_inLine +BABEL_OP3_404_53492_20150525_055025_outLine +BABEL_OP3_404_53665_20150526_004549_inLine +BABEL_OP3_404_53917_20150503_205456_outLine +BABEL_OP3_404_53957_20141201_051933_inLine +BABEL_OP3_404_54477_20141211_033627_inLine +BABEL_OP3_404_54477_20141211_033627_outLine +BABEL_OP3_404_55013_20150525_222257_inLine +BABEL_OP3_404_55013_20150525_222257_outLine +BABEL_OP3_404_55267_20141130_212756_inLine +BABEL_OP3_404_55349_20150523_031602_inLine +BABEL_OP3_404_55349_20150523_031602_outLine +BABEL_OP3_404_56019_20150502_020750_inLine +BABEL_OP3_404_56019_20150502_020750_outLine +BABEL_OP3_404_56076_20150516_164959_inLine +BABEL_OP3_404_56076_20150516_164959_outLine +BABEL_OP3_404_56331_20150526_020747_inLine +BABEL_OP3_404_56331_20150526_020747_outLine +BABEL_OP3_404_56743_20141114_223719_inLine +BABEL_OP3_404_56743_20141114_223719_outLine +BABEL_OP3_404_57065_20141201_002920_inLine +BABEL_OP3_404_57219_20150618_045613_inLine +BABEL_OP3_404_57219_20150618_045613_outLine +BABEL_OP3_404_57464_20150523_224617_inLine +BABEL_OP3_404_57542_20150526_233832_inLine +BABEL_OP3_404_57542_20150526_233832_outLine +BABEL_OP3_404_57542_20150526_235003_inLine +BABEL_OP3_404_57542_20150526_235003_outLine +BABEL_OP3_404_58006_20150526_024205_inLine +BABEL_OP3_404_58006_20150526_024205_outLine +BABEL_OP3_404_58026_20150615_004130_inLine +BABEL_OP3_404_58026_20150615_004130_outLine +BABEL_OP3_404_58915_20150611_034220_outLine +BABEL_OP3_404_59307_20150504_003405_inLine +BABEL_OP3_404_59307_20150504_003405_outLine +BABEL_OP3_404_59864_20150602_014458_inLine +BABEL_OP3_404_60299_20150611_040929_inLine +BABEL_OP3_404_60310_20141130_231532_inLine +BABEL_OP3_404_60310_20141130_231532_outLine +BABEL_OP3_404_60352_20141201_060712_inLine +BABEL_OP3_404_60352_20141201_060712_outLine +BABEL_OP3_404_60352_20141201_061821_inLine +BABEL_OP3_404_60352_20141201_061821_outLine +BABEL_OP3_404_60458_20150609_021527_inLine +BABEL_OP3_404_60458_20150609_021527_outLine +BABEL_OP3_404_60477_20150613_223056_inLine +BABEL_OP3_404_60477_20150613_224002_inLine +BABEL_OP3_404_60498_20150606_022221_inLine +BABEL_OP3_404_60498_20150606_022221_outLine +BABEL_OP3_404_60706_20141020_215729_inLine +BABEL_OP3_404_60706_20141020_215729_outLine +BABEL_OP3_404_61888_20150504_171019_inLine +BABEL_OP3_404_61971_20150525_020101_outLine +BABEL_OP3_404_62360_20150517_033230_inLine +BABEL_OP3_404_62360_20150517_033230_outLine +BABEL_OP3_404_62724_20141130_200827_inLine +BABEL_OP3_404_62724_20141130_200827_outLine +BABEL_OP3_404_62852_20141013_054854_outLine +BABEL_OP3_404_63425_20141126_054504_inLine +BABEL_OP3_404_63481_20141020_221014_outLine +BABEL_OP3_404_63481_20141020_224225_outLine +BABEL_OP3_404_63670_20141130_050318_inLine +BABEL_OP3_404_63670_20141130_050318_outLine +BABEL_OP3_404_63906_20150525_050310_inLine +BABEL_OP3_404_63906_20150525_050310_outLine +BABEL_OP3_404_63999_20150610_041309_inLine +BABEL_OP3_404_64014_20150503_032745_inLine +BABEL_OP3_404_64014_20150503_032745_outLine +BABEL_OP3_404_64722_20150514_034208_outLine +BABEL_OP3_404_64759_20141014_044027_inLine +BABEL_OP3_404_64759_20141014_045519_inLine +BABEL_OP3_404_64796_20141022_055826_inLine +BABEL_OP3_404_65561_20141124_060558_inLine +BABEL_OP3_404_65561_20141124_060558_outLine +BABEL_OP3_404_65640_20150528_211835_inLine +BABEL_OP3_404_65640_20150528_211835_outLine +BABEL_OP3_404_66967_20141008_202611_inLine +BABEL_OP3_404_66967_20141008_202611_outLine +BABEL_OP3_404_67152_20150503_201836_inLine +BABEL_OP3_404_67152_20150503_201836_outLine +BABEL_OP3_404_67304_20150211_054416_inLine +BABEL_OP3_404_67304_20150211_054416_outLine +BABEL_OP3_404_67552_20141126_011955_inLine +BABEL_OP3_404_67552_20141126_011955_outLine +BABEL_OP3_404_68306_20141126_180315_inLine +BABEL_OP3_404_68306_20141126_180315_outLine +BABEL_OP3_404_69096_20150512_165126_inLine +BABEL_OP3_404_69096_20150512_165126_outLine +BABEL_OP3_404_69153_20141130_221412_inLine +BABEL_OP3_404_69153_20141130_221412_outLine +BABEL_OP3_404_69153_20141130_222842_inLine +BABEL_OP3_404_69153_20141130_222842_outLine +BABEL_OP3_404_69474_20141128_051323_outLine +BABEL_OP3_404_69633_20141129_051648_inLine +BABEL_OP3_404_69633_20141129_051648_outLine +BABEL_OP3_404_69636_20141126_061322_inLine +BABEL_OP3_404_69636_20141126_061322_outLine +BABEL_OP3_404_69885_20150503_011226_inLine +BABEL_OP3_404_69885_20150503_011226_outLine +BABEL_OP3_404_69937_20150620_015912_inLine +BABEL_OP3_404_69964_20150524_015556_inLine +BABEL_OP3_404_69964_20150524_015556_outLine +BABEL_OP3_404_69982_20150625_035440_outLine +BABEL_OP3_404_70221_20141124_052004_inLine +BABEL_OP3_404_70221_20141124_052004_outLine +BABEL_OP3_404_70460_20150527_015340_inLine +BABEL_OP3_404_70460_20150527_015340_outLine +BABEL_OP3_404_70526_20150501_015444_inLine +BABEL_OP3_404_70526_20150501_015444_outLine +BABEL_OP3_404_70713_20150527_013058_inLine +BABEL_OP3_404_70713_20150527_013058_outLine +BABEL_OP3_404_71189_20150523_005918_inLine +BABEL_OP3_404_71189_20150523_005918_outLine +BABEL_OP3_404_71278_20150211_052730_inLine +BABEL_OP3_404_71278_20150211_052730_outLine +BABEL_OP3_404_71278_20150211_054040_inLine +BABEL_OP3_404_71278_20150211_054040_outLine +BABEL_OP3_404_71333_20141102_023503_inLine +BABEL_OP3_404_71333_20141102_023503_outLine +BABEL_OP3_404_71460_20150206_015309_outLine +BABEL_OP3_404_71559_20141210_220929_outLine +BABEL_OP3_404_71780_20141105_055543_inLine +BABEL_OP3_404_71780_20141105_055543_outLine +BABEL_OP3_404_72319_20150502_041426_inLine +BABEL_OP3_404_72319_20150502_041426_outLine +BABEL_OP3_404_72733_20150515_044419_inLine +BABEL_OP3_404_72733_20150515_044419_outLine +BABEL_OP3_404_73072_20141012_012029_inLine +BABEL_OP3_404_73072_20141012_012029_outLine +BABEL_OP3_404_73258_20141117_010123_inLine +BABEL_OP3_404_73258_20141117_010123_outLine +BABEL_OP3_404_73964_20150512_205010_inLine +BABEL_OP3_404_73964_20150512_205010_outLine +BABEL_OP3_404_74728_20150503_042547_inLine +BABEL_OP3_404_74728_20150503_042547_outLine +BABEL_OP3_404_75465_20141129_223330_outLine +BABEL_OP3_404_75975_20150127_051140_outLine +BABEL_OP3_404_76126_20141201_202238_inLine +BABEL_OP3_404_76126_20141201_202238_outLine +BABEL_OP3_404_76238_20141129_223455_inLine +BABEL_OP3_404_76238_20141129_223455_outLine +BABEL_OP3_404_76372_20150601_014341_inLine +BABEL_OP3_404_76372_20150601_014341_outLine +BABEL_OP3_404_76444_20141127_032124_inLine +BABEL_OP3_404_76444_20141127_032124_outLine +BABEL_OP3_404_76482_20150618_063131_outLine +BABEL_OP3_404_76683_20141110_191551_inLine +BABEL_OP3_404_76683_20141110_191551_outLine +BABEL_OP3_404_76837_20150124_222250_outLine +BABEL_OP3_404_76970_20150625_191722_inLine +BABEL_OP3_404_77146_20141019_060916_inLine +BABEL_OP3_404_77242_20150612_024655_inLine +BABEL_OP3_404_77567_20141021_021210_inLine +BABEL_OP3_404_77567_20141021_021210_outLine +BABEL_OP3_404_77803_20141020_030844_inLine +BABEL_OP3_404_77803_20141020_030844_outLine +BABEL_OP3_404_78454_20141115_043455_inLine +BABEL_OP3_404_78749_20150620_025728_inLine +BABEL_OP3_404_78749_20150620_025728_outLine +BABEL_OP3_404_79190_20141108_232204_inLine +BABEL_OP3_404_79190_20141108_232204_outLine +BABEL_OP3_404_79590_20141129_025808_outLine +BABEL_OP3_404_79820_20141104_045340_inLine +BABEL_OP3_404_79820_20141104_045340_outLine +BABEL_OP3_404_79858_20141015_200446_inLine +BABEL_OP3_404_79898_20150620_022648_inLine +BABEL_OP3_404_79898_20150620_022648_outLine +BABEL_OP3_404_79898_20150620_024014_inLine +BABEL_OP3_404_79898_20150620_024014_outLine +BABEL_OP3_404_80069_20150614_233606_inLine +BABEL_OP3_404_80069_20150614_233606_outLine +BABEL_OP3_404_80306_20141119_003833_inLine +BABEL_OP3_404_80306_20141119_003833_outLine +BABEL_OP3_404_80306_20141119_005121_inLine +BABEL_OP3_404_80306_20141119_005121_outLine +BABEL_OP3_404_80559_20141022_010255_inLine +BABEL_OP3_404_80655_20150525_221544_inLine +BABEL_OP3_404_80655_20150525_221544_outLine +BABEL_OP3_404_80897_20141119_233718_inLine +BABEL_OP3_404_80897_20141119_233718_outLine +BABEL_OP3_404_81149_20150525_003741_inLine +BABEL_OP3_404_81149_20150525_003741_outLine +BABEL_OP3_404_81427_20141030_015136_inLine +BABEL_OP3_404_81427_20141030_015136_outLine +BABEL_OP3_404_81854_20150610_060437_inLine +BABEL_OP3_404_82626_20150615_014517_inLine +BABEL_OP3_404_82863_20141119_044230_inLine +BABEL_OP3_404_82863_20141119_044230_outLine +BABEL_OP3_404_83651_20141102_170912_inLine +BABEL_OP3_404_83651_20141102_170912_outLine +BABEL_OP3_404_83771_20150604_012300_outLine +BABEL_OP3_404_83974_20150617_022055_inLine +BABEL_OP3_404_84125_20141018_023340_inLine +BABEL_OP3_404_84125_20141018_023340_outLine +BABEL_OP3_404_84458_20141130_053628_outLine +BABEL_OP3_404_84815_20141127_011952_inLine +BABEL_OP3_404_84815_20141127_013345_inLine +BABEL_OP3_404_85047_20141117_014630_inLine +BABEL_OP3_404_85047_20141117_014630_outLine +BABEL_OP3_404_85340_20141103_022707_inLine +BABEL_OP3_404_85340_20141103_022707_outLine +BABEL_OP3_404_86597_20150612_170328_inLine +BABEL_OP3_404_86597_20150612_170328_outLine +BABEL_OP3_404_87074_20141105_190107_outLine +BABEL_OP3_404_87777_20141127_040747_inLine +BABEL_OP3_404_87777_20141127_040747_outLine +BABEL_OP3_404_87871_20141201_023608_inLine +BABEL_OP3_404_87871_20141201_023608_outLine +BABEL_OP3_404_87921_20141201_023029_inLine +BABEL_OP3_404_87921_20141201_023029_outLine +BABEL_OP3_404_88873_20141028_190127_inLine +BABEL_OP3_404_88873_20141028_190127_outLine +BABEL_OP3_404_89330_20150616_002908_inLine +BABEL_OP3_404_89330_20150616_002908_outLine +BABEL_OP3_404_89943_20141105_211847_outLine +BABEL_OP3_404_90347_20141119_012016_inLine +BABEL_OP3_404_90347_20141119_012016_outLine +BABEL_OP3_404_90760_20150611_151739_inLine +BABEL_OP3_404_90760_20150611_151739_outLine +BABEL_OP3_404_90832_20150616_012728_inLine +BABEL_OP3_404_90832_20150616_012728_outLine +BABEL_OP3_404_91383_20150618_035815_inLine +BABEL_OP3_404_91475_20150614_034536_inLine +BABEL_OP3_404_91581_20141129_045608_inLine +BABEL_OP3_404_91581_20141129_045608_outLine +BABEL_OP3_404_91581_20141129_050730_inLine +BABEL_OP3_404_91581_20141129_050730_outLine +BABEL_OP3_404_91593_20150611_021825_inLine +BABEL_OP3_404_91593_20150611_021825_outLine +BABEL_OP3_404_91884_20150503_022858_inLine +BABEL_OP3_404_91884_20150503_022858_outLine +BABEL_OP3_404_91888_20150512_191012_inLine +BABEL_OP3_404_91888_20150512_191012_outLine +BABEL_OP3_404_91891_20141129_005825_inLine +BABEL_OP3_404_91891_20141129_005825_outLine +BABEL_OP3_404_91944_20141022_021002_inLine +BABEL_OP3_404_91977_20141122_230420_outLine +BABEL_OP3_404_92176_20141119_195614_inLine +BABEL_OP3_404_92176_20141119_195614_outLine +BABEL_OP3_404_92281_20150625_185123_inLine +BABEL_OP3_404_92757_20150525_200048_inLine +BABEL_OP3_404_92757_20150525_200048_outLine +BABEL_OP3_404_92792_20150503_182854_outLine +BABEL_OP3_404_92792_20150525_025523_outLine +BABEL_OP3_404_92942_20141120_022830_inLine +BABEL_OP3_404_92942_20141120_022830_outLine +BABEL_OP3_404_93007_20150615_051230_inLine +BABEL_OP3_404_93007_20150615_051230_outLine +BABEL_OP3_404_93858_20150611_043732_inLine +BABEL_OP3_404_94002_20141119_015307_inLine +BABEL_OP3_404_94002_20141119_015307_outLine +BABEL_OP3_404_94333_20141020_024439_outLine +BABEL_OP3_404_94487_20150518_005132_outLine +BABEL_OP3_404_95077_20141201_055702_outLine +BABEL_OP3_404_95269_20141105_221810_inLine +BABEL_OP3_404_95269_20141105_221810_outLine +BABEL_OP3_404_95338_20150610_211203_inLine +BABEL_OP3_404_95338_20150610_211203_outLine +BABEL_OP3_404_95399_20141119_001023_inLine +BABEL_OP3_404_95399_20141119_001023_outLine +BABEL_OP3_404_95583_20141019_010741_inLine +BABEL_OP3_404_95583_20141019_010741_outLine +BABEL_OP3_404_96059_20150524_042224_outLine +BABEL_OP3_404_96205_20141119_033053_inLine +BABEL_OP3_404_96205_20141119_033053_outLine +BABEL_OP3_404_96205_20141119_034909_inLine +BABEL_OP3_404_96205_20141119_034909_outLine +BABEL_OP3_404_96247_20150526_202623_outLine +BABEL_OP3_404_96690_20141117_053054_inLine +BABEL_OP3_404_96690_20141117_053054_outLine +BABEL_OP3_404_96808_20150609_034129_inLine +BABEL_OP3_404_97097_20150601_042649_outLine +BABEL_OP3_404_97136_20150528_011250_inLine +BABEL_OP3_404_97136_20150528_011250_outLine +BABEL_OP3_404_97911_20150613_195820_outLine +BABEL_OP3_404_98165_20141030_214051_inLine +BABEL_OP3_404_98165_20141030_214051_outLine +BABEL_OP3_404_98192_20150617_021906_outLine +BABEL_OP3_404_98489_20141102_002030_inLine +BABEL_OP3_404_98489_20141102_004054_inLine +BABEL_OP3_404_98678_20150528_021605_inLine +BABEL_OP3_404_98678_20150528_023029_inLine +BABEL_OP3_404_99289_20150521_220314_inLine +BABEL_OP3_404_99289_20150521_220314_outLine +BABEL_OP3_404_99289_20150521_222144_inLine +BABEL_OP3_404_99289_20150521_222144_outLine +BABEL_OP3_404_99718_20141019_051850_inLine +BABEL_OP3_404_99718_20141019_051850_outLine +BABEL_OP3_404_99718_20141019_053305_inLine +BABEL_OP3_404_99718_20141019_053305_outLine +BABEL_OP3_404_99732_20141130_232553_inLine +BABEL_OP3_404_99732_20141130_232553_outLine +BABEL_OP3_404_99920_20141022_052026_inLine diff --git a/egs/babel/s5d/local/arpa2G.sh b/egs/babel/s5d/local/arpa2G.sh index 40c269fbb22..887b393b459 100755 --- a/egs/babel/s5d/local/arpa2G.sh +++ b/egs/babel/s5d/local/arpa2G.sh @@ -85,7 +85,8 @@ if [ ! -z "$oov_prob_file" ]; then print "$log10prob $word\n"; } }} print STDERR "Ceilinged $ceilinged unk-probs\n";' \ - $oov_prob_file $min_prob $unk_fraction | gzip -c > $destdir/lm_tmp.gz + $oov_prob_file $min_prob $unk_fraction | \ + ngram -unk -lm - -write-lm $destdir/lm_tmp.gz lmfile=$destdir/lm_tmp.gz fi diff --git a/egs/babel/s5d/local/chain/run_blstm.sh b/egs/babel/s5d/local/chain/run_blstm.sh index 6d13c55fc7d..f098604d04a 100755 --- a/egs/babel/s5d/local/chain/run_blstm.sh +++ b/egs/babel/s5d/local/chain/run_blstm.sh @@ -136,7 +136,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_bab1.sh b/egs/babel/s5d/local/chain/run_blstm_bab1.sh index ba8da0e14bc..95c7e9f28aa 100755 --- a/egs/babel/s5d/local/chain/run_blstm_bab1.sh +++ b/egs/babel/s5d/local/chain/run_blstm_bab1.sh @@ -136,7 +136,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_bab2.sh b/egs/babel/s5d/local/chain/run_blstm_bab2.sh index f5d698e262c..a6dd4cb9566 100755 --- a/egs/babel/s5d/local/chain/run_blstm_bab2.sh +++ b/egs/babel/s5d/local/chain/run_blstm_bab2.sh @@ -136,7 +136,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_bab3.sh b/egs/babel/s5d/local/chain/run_blstm_bab3.sh index 7ad51204c6f..52f085f8942 100755 --- a/egs/babel/s5d/local/chain/run_blstm_bab3.sh +++ b/egs/babel/s5d/local/chain/run_blstm_bab3.sh @@ -136,7 +136,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_bab4.sh b/egs/babel/s5d/local/chain/run_blstm_bab4.sh index 72aaeb8778f..47704e80ae4 100755 --- a/egs/babel/s5d/local/chain/run_blstm_bab4.sh +++ b/egs/babel/s5d/local/chain/run_blstm_bab4.sh @@ -135,7 +135,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_bab5.sh b/egs/babel/s5d/local/chain/run_blstm_bab5.sh index 1bae225022e..73c6a4089ed 100755 --- a/egs/babel/s5d/local/chain/run_blstm_bab5.sh +++ b/egs/babel/s5d/local/chain/run_blstm_bab5.sh @@ -135,7 +135,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_blstm_xconfig.sh b/egs/babel/s5d/local/chain/run_blstm_xconfig.sh new file mode 100755 index 00000000000..27e1a571ad0 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_blstm_xconfig.sh @@ -0,0 +1,206 @@ +#!/bin/bash + + +# by default, with cleanup: +# local/chain/run_blstm.sh +# %WER 46.8 | 19252 60586 | 57.6 28.5 13.8 4.5 46.8 31.7 | -0.643 | exp/chain_cleaned/blstm_sp_bi/decode_dev10h.pem/score_8/penalty_0.25/dev10h.pem.ctm.sys + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=17 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=_xconfig #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + $langdir $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + #echo "$0: creating neural net configs"; + #steps/nnet3/lstm/make_configs.py \ + # --self-repair-scale-nonlinearity 0.00001 \ + # --self-repair-scale-clipgradient 1.0 \ + # $dir/configs || exit 1; + echo "$0: creating neural net configs using the xconfig parser"; + + label_delay=0 + xent_regularize=0.1 + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + lstmp-layer name=blstm1-forward input=lda cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm1-backward input=lda cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=3 + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph +fi + +exit 0 diff --git a/egs/babel/s5d/local/chain/run_ivector_common.sh b/egs/babel/s5d/local/chain/run_ivector_common.sh index 7354d59465b..696fd14b45f 100755 --- a/egs/babel/s5d/local/chain/run_ivector_common.sh +++ b/egs/babel/s5d/local/chain/run_ivector_common.sh @@ -71,7 +71,8 @@ if [ $stage -le 2 ]; then utils/copy_data_dir.sh data/${train_set}_sp data/${train_set}_sp_hires mfccdir=data/${train_set}_sp_hires/data if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage fi # do volume-perturbation on the training data prior to extracting hires @@ -171,7 +172,8 @@ if [ $stage -le 7 ]; then # valid for the non-'max2' data, the utterance list is the same. ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then - utils/create_split_dir.pl /export/b{15,16,17,18}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$ivectordir/storage $ivectordir/storage fi # We extract iVectors on the speed-perturbed training data after combining # short segments, which will be what we train the system on. With diff --git a/egs/babel/s5d/local/chain/run_tdnn.sh b/egs/babel/s5d/local/chain/run_tdnn.sh index 3ce53fa9292..2d9b6db75b7 100755 --- a/egs/babel/s5d/local/chain/run_tdnn.sh +++ b/egs/babel/s5d/local/chain/run_tdnn.sh @@ -133,7 +133,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab1.sh b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh index db82c0f358a..0fa4020977c 100755 --- a/egs/babel/s5d/local/chain/run_tdnn_bab1.sh +++ b/egs/babel/s5d/local/chain/run_tdnn_bab1.sh @@ -133,7 +133,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab2.sh b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh index 51387901683..ea9d5959c75 100755 --- a/egs/babel/s5d/local/chain/run_tdnn_bab2.sh +++ b/egs/babel/s5d/local/chain/run_tdnn_bab2.sh @@ -133,7 +133,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab3.sh b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh index 098c3de0482..2973a2c9f02 100755 --- a/egs/babel/s5d/local/chain/run_tdnn_bab3.sh +++ b/egs/babel/s5d/local/chain/run_tdnn_bab3.sh @@ -3,7 +3,6 @@ # by default, with cleanup: # local/chain/run_tdnn.sh - # %WER 46.7 | 19252 60586 | 57.4 26.4 16.2 4.0 46.7 31.6 | -0.469 | exp/chain_cleaned/tdnnbab3_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys set -e -o pipefail @@ -134,7 +133,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_tdnn_bab4.sh b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh index 5831cfc28f0..bd2eba9cb8b 100755 --- a/egs/babel/s5d/local/chain/run_tdnn_bab4.sh +++ b/egs/babel/s5d/local/chain/run_tdnn_bab4.sh @@ -133,7 +133,7 @@ fi if [ $stage -le 18 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi [ ! -d $dir/egs ] && mkdir -p $dir/egs/ touch $dir/egs/.nodelete # keep egs around when that run dies. diff --git a/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh b/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..ec8366492d7 --- /dev/null +++ b/egs/babel/s5d/local/chain/run_tdnn_lstm_1e.sh @@ -0,0 +1,227 @@ +#!/bin/bash + +# From egs/swbdrun_tdnn_lstm_1e.sh + +set -e -o pipefail -u + +# configs for 'chain' +stage=0 +nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri5_cleaned # the gmm for the target data +langdir=data/langp/tri5_ali +num_threads_ubm=12 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +blstm_affix=bab1 #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain_cleaned/blstm_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $tree_dir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage + fi + [ ! -d $dir/egs ] && mkdir -p $dir/egs/ + touch $dir/egs/.nodelete # keep egs around when that run dies. + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + #--trainer.num-chunk-per-minibatch 128,64 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 6 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +exit 0 diff --git a/egs/babel/s5d/local/check_tools.sh b/egs/babel/s5d/local/check_tools.sh index ca8800def41..2c96f8445d1 100755 --- a/egs/babel/s5d/local/check_tools.sh +++ b/egs/babel/s5d/local/check_tools.sh @@ -18,20 +18,20 @@ [ -f ./path.sh ] && . ./path.sh sph2pipe=`command -v sph2pipe 2>/dev/null` \ - || { echo >&2 "sph2pipe not found on PATH. Did you run make in the $KALDI_ROOT/tools directory?"; return 1; } + || { echo >&2 "sph2pipe not found on PATH. Did you run make in the $KALDI_ROOT/tools directory?"; exit 1; } srilm=`command -v ngram 2>/dev/null` \ - || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh"; return 1; } + || { echo >&2 "srilm not found on PATH. Please use the script $KALDI_ROOT/tools/extras/install_srilm.sh"; exit 1; } sox=`command -v sox 2>/dev/null` \ - || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; return 1; } + || { echo >&2 "sox not found on PATH. Please install it manually (you will need version 14.4.0 and higher)."; exit 1; } # If sox is found on path, check if the version is correct if [ ! -z "$sox" ]; then sox_version=`$sox --version 2>&1| head -1 | sed -e 's?.*: ??' -e 's?.* ??'` if [[ ! $sox_version =~ v14.4.* ]]; then echo "Unsupported sox version $sox_version found on path. You will need version v14.4.0 and higher." - return 1 + exit 1 fi fi diff --git a/egs/babel/s5d/local/extend_lexicon.sh b/egs/babel/s5d/local/extend_lexicon.sh index c930b1729e0..41b244f110b 100755 --- a/egs/babel/s5d/local/extend_lexicon.sh +++ b/egs/babel/s5d/local/extend_lexicon.sh @@ -148,20 +148,10 @@ cp $input_lexicon $toplevel_dir/input_lexicon.txt # just to have a record of wh loc=`which ngram-count`; if [ -z $loc ]; then - if uname -a | grep 64 >/dev/null; then # some kind of 64 bit... - sdir=`pwd`/../../../tools/srilm/bin/i686-m64 - else - sdir=`pwd`/../../../tools/srilm/bin/i686 - fi - if [ -f $sdir/ngram-count ]; then - echo Using SRILM tools from $sdir - export PATH=$PATH:$sdir - else - echo You appear to not have SRILM tools installed, either on your path, - echo or installed in $sdir. See tools/install_srilm.sh for installation - echo instructions. - exit 1 - fi + echo You appear to not have SRILM tools installed, either on your path, + echo or installed in $sdir. See tools/install_srilm.sh for installation + echo instructions. + exit 1 fi @@ -231,10 +221,9 @@ if [ $stage -le -3 ]; then echo "$0: using SRILM to train syllable LM" - ngram-count -lm $dir/3gram.kn022.gz -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $dir/syllable_text.txt -sort - + ngram-count -lm $dir/3gram.me.gz -maxent -maxent-convert-to-arpa -kndiscount1 -gt1min 0 -kndiscount2 -gt2min 2 -kndiscount3 -gt3min 2 -order 3 -text $dir/syllable_text.txt -sort rm $dir/lm.gz 2>/dev/null - ln -s 3gram.kn022.gz $dir/lm.gz + ln -s 3gram.me.gz $dir/lm.gz fi diff --git a/egs/babel/s5d/local/generate_confusion_matrix.sh b/egs/babel/s5d/local/generate_confusion_matrix.sh index 48263e729de..fb602cf0957 100755 --- a/egs/babel/s5d/local/generate_confusion_matrix.sh +++ b/egs/babel/s5d/local/generate_confusion_matrix.sh @@ -61,7 +61,7 @@ fi mkdir -p $wdir/log cat $data/phones.txt | sed 's/_[B|E|I|S]//g' |\ - sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' > $wdir/phones.txt + sed 's/_[%|"]//g' | sed 's/_[0-9]\+//g' | sed 's/_[^ ]*//g' > $wdir/phones.txt echo "Converting alignments to phone sequences..." $cmd JOB=1:$nj $wdir/log/ali_to_phones.JOB.log \ @@ -81,7 +81,8 @@ for i in `seq 1 $nj` ; do done echo "Converting statistics..." -cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g'| sort | uniq -c | \ +cat $confusion_files | cut -f 2- -d ' ' | sed 's/ *; */\n/g' | \ + sed 's/ *$//g' | sed 's/^ *//g' | sort | uniq -c | \ grep -v -E '|||SIL' | \ perl -ane ' die unless scalar @F == 3; diff --git a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py index b6d4b9ab944..3670ba755bc 100755 --- a/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py +++ b/egs/babel/s5d/local/lexicon/make_unicode_lexicon.py @@ -27,7 +27,7 @@ def main(): unicode_transcription = baseform2unicode(baseforms) encoded_transcription, table = encode(unicode_transcription, args.tag_percentage, - log=args.verbose) + log=args.log) write_table(table, args.lex_out) # Extract dictionary of nonspeech pronunciations @@ -59,7 +59,7 @@ def parse_input(): Parse commandline input. ''' if len(sys.argv[1:]) == 0: - print("Usage: ./make_unicode_lexicon.py [opts] lex_in lex_out") + print("Usage: ./make_unicode_lexicon.py [opts] lex_in lex_out [log]") sys.exit(1) parser = argparse.ArgumentParser() @@ -67,7 +67,9 @@ def parse_input(): "paired with a baseform. 1 word per line with the " "baseform separated by a tab") parser.add_argument("lex_out", help="Path of output output " - "graphemc lexicon") + "graphemic lexicon") + parser.add_argument("log", nargs='?', default=None, + help="Directory in which the logs will be stored"); parser.add_argument("-F", "--fmt", help="Format of input word list", action="store", default="word_list") parser.add_argument("-T", "--tag_percentage", help="Percentage of least" @@ -246,12 +248,11 @@ def encode(unicode_transcription, tag_percentage, log=False): graph_counts = graph_counts_dict # Print grapheme counts to histogram - if log: + if log is not None: graph_counts_sorted = sorted(graph_counts, reverse=True, key=graph_counts.get) - if not os.path.exists("lex_log"): - os.makedirs("lex_log") - with codecs.open("lex_log/grapheme_histogram.txt", "w", "utf-8") as fp: + logfile = "{}/grapheme_histogram.txt".format(log) + with codecs.open(logfile, "w", "utf-8") as fp: fp.write("Graphemes (Count Threshold = %.6f)\n" % count_thresh) for g in graph_counts_sorted: weight = ("-" * int(np.ceil(500.0 * graph_counts[g])) + diff --git a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh index 2ffb73810e3..be6aa5c2b40 100755 --- a/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh +++ b/egs/babel/s5d/local/nist_eval/create_new_language_configs.LLP.sh @@ -4,15 +4,16 @@ # Begin configuration section. language="201-haitian" +corpus=/export/babel/data/ +indus=/export/babel/data/scoring/IndusDB # End configuration section . ./utils/parse_options.sh set -e -o pipefail set -o nounset # Treat unset variables as an error -corpus=/export/babel/data/$language +corpus=$corpus/$language lists=./conf/lists/$language/ -indus=/export/babel/data/scoring/IndusDB corpusdir=$(find $corpus -maxdepth 1 -name "*-build" -type d) || exit 1 [ -z "$corpusdir" ] && "Corpus directory for $language not found!" && exit 1 diff --git a/egs/babel/s5d/local/nnet3/run_blstm.sh b/egs/babel/s5d/local/nnet3/run_blstm.sh index 6833baa0d72..fcf7fb8947d 100755 --- a/egs/babel/s5d/local/nnet3/run_blstm.sh +++ b/egs/babel/s5d/local/nnet3/run_blstm.sh @@ -5,7 +5,7 @@ cell_dim=512 rp_dim=128 nrp_dim=128 affix=bidirectional -multicondition=true +multicondition=false common_egs_dir= num_epochs=8 diff --git a/egs/babel/s5d/local/nnet3/run_ivector_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_common.sh index bfe66d13f76..7313230a7ee 100755 --- a/egs/babel/s5d/local/nnet3/run_ivector_common.sh +++ b/egs/babel/s5d/local/nnet3/run_ivector_common.sh @@ -60,8 +60,8 @@ fi if [ $stage -le 3 ]; then mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - date=$(date +'%m_%d_%H_%M') - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage fi # the 100k_nodup directory is copied seperately, as diff --git a/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh index 8d3973e65bc..c3a6e1c0952 100755 --- a/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh +++ b/egs/babel/s5d/local/nnet3/run_ivector_multicondition_common.sh @@ -70,8 +70,8 @@ fi if [ $stage -le 3 ]; then mfccdir=mfcc_hires if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - date=$(date +'%m_%d_%H_%M') - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/swbd-$date/s5b/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl \ + /export/b0{1,2,3,4}/$USER/kaldi-data/egs/kaldi-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage fi # the 100k_nodup directory is copied seperately, as @@ -151,8 +151,8 @@ train_set=train_sp_mc if [ $stage -le 7 ]; then mfccdir=mfcc_reverb if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then - date=$(date +'%m_%d_%H_%M') - utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/babel_reverb-$date/s5/$mfccdir/storage $mfccdir/storage + utils/create_split_dir.pl \ + /export/b0{1,2,3,4}/$USER/kaldi-data/egs/babel_reverb-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$mfccdir/storage $mfccdir/storage fi for data_dir in $train_set; do utils/copy_data_dir.sh data/$data_dir data/${data_dir}_hires diff --git a/egs/babel/s5d/local/nnet3/run_lstm.sh b/egs/babel/s5d/local/nnet3/run_lstm.sh index 8105cfda387..f7d06501569 100755 --- a/egs/babel/s5d/local/nnet3/run_lstm.sh +++ b/egs/babel/s5d/local/nnet3/run_lstm.sh @@ -121,7 +121,7 @@ fi if [ $stage -le 13 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi steps/nnet3/train_rnn.py --stage=$train_stage \ @@ -136,7 +136,6 @@ if [ $stage -le 13 ]; then --trainer.optimization.final-effective-lrate=$final_effective_lrate \ --trainer.optimization.shrink-value 0.99 \ --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \ - --trainer.optimization.cv-minibatch-size 128 \ --trainer.optimization.momentum=$momentum \ --egs.chunk-width=$chunk_width \ --egs.chunk-left-context=$chunk_left_context \ diff --git a/egs/babel/s5d/local/nnet3/run_lstm_realigned.sh b/egs/babel/s5d/local/nnet3/run_lstm_realigned.sh index acd65e9114e..2448b1b17ff 100755 --- a/egs/babel/s5d/local/nnet3/run_lstm_realigned.sh +++ b/egs/babel/s5d/local/nnet3/run_lstm_realigned.sh @@ -114,7 +114,7 @@ fi if [ $stage -le 3 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi steps/nnet3/train_rnn.py --stage=$train_stage \ diff --git a/egs/babel/s5d/local/nnet3/run_tdnn.sh b/egs/babel/s5d/local/nnet3/run_tdnn.sh index 8899e363dd9..2a663486bcb 100755 --- a/egs/babel/s5d/local/nnet3/run_tdnn.sh +++ b/egs/babel/s5d/local/nnet3/run_tdnn.sh @@ -60,7 +60,7 @@ local/nnet3/run_ivector_common.sh --stage $stage \ if [ $stage -le 9 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ - /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage fi steps/nnet3/train_tdnn.sh --stage $train_stage \ diff --git a/egs/babel/s5d/local/reestimate_langp.sh b/egs/babel/s5d/local/reestimate_langp.sh index 059fba52043..ae70b6a8f46 100755 --- a/egs/babel/s5d/local/reestimate_langp.sh +++ b/egs/babel/s5d/local/reestimate_langp.sh @@ -29,5 +29,6 @@ utils/dict_dir_add_pronprobs.sh --max-normalize true $idict \ $amdir/pron_bigram_counts_nowb.txt $odict utils/prepare_lang.sh --phone-symbol-table $langdir/phones.txt \ + --share-silence-phones true \ $odict "$unk" $olocallang $olang diff --git a/egs/babel/s5d/local/run_kws_stt_task2.sh b/egs/babel/s5d/local/run_kws_stt_task2.sh index 6007baa1756..9c10bfe6da5 100755 --- a/egs/babel/s5d/local/run_kws_stt_task2.sh +++ b/egs/babel/s5d/local/run_kws_stt_task2.sh @@ -71,14 +71,26 @@ fi if ! $skip_kws ; then [ ! -f $data_dir/extra_kws_tasks ] && exit 0 - syll_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.syll.$1/g' ) + idata=$(basename $data_dir) + idir=$(dirname $data_dir) + + idataset=${idata%%.*} + idatatype=${idata#*.} + + if [ "$idata" == "$idataset" ]; then + syll_data_dir=$idir/${idataset}.syll + phn_data_dir=$idir/${idataset}.phn + else + syll_data_dir=$idir/${idataset}.syll.${idatatype} + phn_data_dir=$idir/${idataset}.phn.${idatatype} + fi + if [ -d ${syll_data_dir} ] && [ ! -f ${decode_dir}/syllabs/.done ] ; then local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ $data_dir $lang_dir ${lang_dir}.syll $decode_dir ${decode_dir}/syllabs touch ${decode_dir}/syllabs/.done fi - phn_data_dir=$(echo $data_dir | perl -pe 's/\.(pem|seg)$/.phn.$1/g' ) if [ -d ${phn_data_dir} ] && [ ! -f ${decode_dir}/phones/.done ] ; then local/syllab/lattice_word2syll.sh --cmd "$cmd --mem 8G" \ $data_dir $lang_dir ${lang_dir}.phn $decode_dir ${decode_dir}/phones diff --git a/egs/babel/s5d/local/search/run_phn_search.sh b/egs/babel/s5d/local/search/run_phn_search.sh index 44587699a38..e4dba529b3d 100755 --- a/egs/babel/s5d/local/search/run_phn_search.sh +++ b/egs/babel/s5d/local/search/run_phn_search.sh @@ -29,7 +29,11 @@ dataset=${dir%%.*} datatype=${dir#*.} lang=data/lang.phn -data=data/${dataset}.phn.${datatype} +if [ "$dir" == "$dataset" ]; then + data=data/${dataset}.phn +else + data=data/${dataset}.phn.${datatype} +fi set +o nounset eval kwsets=${!dataset_kwlists[@]} @@ -76,7 +80,7 @@ if [ $stage -le 2 ] ; then ${data}/kwset_${set}/tmp.4 # and finally, replace the categories by the word-level categories - cp data/$dir/kwset_${set}/categories $data/kwset_${set}/categories + cp data/${dir}/kwset_${set}/categories $data/kwset_${set}/categories done fi diff --git a/egs/babel/s5d/local/search/run_search.sh b/egs/babel/s5d/local/search/run_search.sh index 2cb40cabb59..1fbdb071123 100755 --- a/egs/babel/s5d/local/search/run_search.sh +++ b/egs/babel/s5d/local/search/run_search.sh @@ -67,8 +67,11 @@ if [ $stage -le 2 ] ; then #-- data/dev10h.pem/${set}_oov_kws/tmp/L1.lex data/dev10h.pem/kwset_${set}/tmp.3 if [ -d data/local/extend ]; then echo "Detected extended lexicon system..." - local/search/compile_proxy_keywords.sh --cmd "$decode_cmd --mem 12G" --filter "OOV=1&&Characters>2"\ - --beam 5 --nbest 50 --nj 64 --phone-beam 5 --phone-nbest 300 --confusion-matrix exp/conf_matrix/confusions.txt \ + local/search/compile_proxy_keywords.sh --filter "OOV=1&&Characters>2"\ + --cmd "$decode_cmd --mem 24G --max-jobs-run 64" --nj 128 \ + --beam $extlex_proxy_beam --nbest $extlex_proxy_nbest \ + --phone-beam $extlex_proxy_phone_beam --phone-nbest $extlex_proxy_phone_nbest\ + --confusion-matrix exp/conf_matrix/confusions.txt \ data/$dir/kwset_${set} data/lang data/local/lexiconp.txt exp/g2p \ data/$dir/kwset_${set}/tmp.4 else diff --git a/egs/babel/s5d/local/search/run_syll_search.sh b/egs/babel/s5d/local/search/run_syll_search.sh index eb48d836e77..41a925ce13a 100755 --- a/egs/babel/s5d/local/search/run_syll_search.sh +++ b/egs/babel/s5d/local/search/run_syll_search.sh @@ -29,7 +29,11 @@ dataset=${dir%%.*} datatype=${dir#*.} lang=data/lang.syll -data=data/${dataset}.syll.${datatype} +if [ "$dir" == "$dataset" ]; then + data=data/${dataset}.syll +else + data=data/${dataset}.syll.${datatype} +fi set +o nounset eval kwsets=${!dataset_kwlists[@]} diff --git a/egs/babel/s5d/local/search/search.sh b/egs/babel/s5d/local/search/search.sh index 200a49d8e86..6a5b2d35a97 100755 --- a/egs/babel/s5d/local/search/search.sh +++ b/egs/babel/s5d/local/search/search.sh @@ -26,6 +26,7 @@ silence_word= # specify this if you did to in kws_setup.sh, it's more accurate. strict=false duptime=0.6 ntrue_scale=1.0 +frame_subsampling_factor=1 nbest=-1 max_silence_frames=50 # End configuration section. diff --git a/egs/babel/s5d/local/syllab/lattice_word2syll.sh b/egs/babel/s5d/local/syllab/lattice_word2syll.sh index b81bf9d18d4..63e9114875d 100755 --- a/egs/babel/s5d/local/syllab/lattice_word2syll.sh +++ b/egs/babel/s5d/local/syllab/lattice_word2syll.sh @@ -26,7 +26,7 @@ mkdir -p $output/log if [ -f $olang/lex.words2syllabs.fst ] ; then fstinvert $olang/lex.words2syllabs.fst | fstreverse | \ - fstminimize | fstreverse > $output/L.fst + fstminimize --allow_nondet | fstreverse > $output/L.fst $cmd JOB=1:$nj $output/log/convert.JOB.log \ lattice-push --push-strings ark:"gunzip -c $input/lat.JOB.gz|" ark:- \| \ diff --git a/egs/babel/s5d/local/syllab/run_phones.sh b/egs/babel/s5d/local/syllab/run_phones.sh index 6f3c7be4cef..7c4a13c61f9 100755 --- a/egs/babel/s5d/local/syllab/run_phones.sh +++ b/egs/babel/s5d/local/syllab/run_phones.sh @@ -21,10 +21,20 @@ if [ $# -ne 1 ] ; then fi idir=$1 + +if [ ! -d "$idir" ] ; then + echo "The directory $idir does not exist" + exit 1 +fi + idata=${idir##*/} -odata=${idata%%.*}.phn.${idata#*.} +if [ "$idata" == ${idata%%.*} ]; then + odata=${idata%%.*}.phn +else + odata=${idata%%.*}.phn.${idata#*.} +fi if [ $stage -le -1 ] ; then local/syllab/generate_phone_lang.sh \ diff --git a/egs/babel/s5d/local/syllab/run_syllabs.sh b/egs/babel/s5d/local/syllab/run_syllabs.sh index a2ec82f3033..7366ac9ad35 100755 --- a/egs/babel/s5d/local/syllab/run_syllabs.sh +++ b/egs/babel/s5d/local/syllab/run_syllabs.sh @@ -21,10 +21,19 @@ if [ $# -ne 1 ] ; then fi idir=$1 -idata=${idir##*/} +if [ ! -d "$idir" ] ; then + echo "The directory $idir does not exist" + exit 1 +fi + +idata=${idir##*/} -odata=${idata%%.*}.syll.${idata#*.} +if [ "$idata" == ${idata%%.*} ]; then + odata=${idata%%.*}.syll +else + odata=${idata%%.*}.syll.${idata#*.} +fi if [ $stage -le -1 ] ; then local/syllab/generate_syllable_lang.sh \ @@ -45,7 +54,7 @@ if [ $stage -le -1 ] ; then local/arpa2G.sh data/srilm.syll/lm.gz data/lang.syll/ data/lang.syll/ fi -if [ $stage -le 0 ] && [ -f "$idir/text" ] ; then +if [ $stage -le 0 ] && [ -f "$idir/text" ]; then #Create dev10h.syll.pem dir steps/align_fmllr.sh \ --boost-silence $boost_sil --nj $train_nj --cmd "$train_cmd" \ diff --git a/egs/babel/s5d/run-1-main-unicode-extend-lex.sh b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh new file mode 100755 index 00000000000..f9de3e8e947 --- /dev/null +++ b/egs/babel/s5d/run-1-main-unicode-extend-lex.sh @@ -0,0 +1,209 @@ +#!/bin/bash + +# Parameters for extended lexicon. +extend_lexicon=true +unk_fraction_boost=1.0 +num_sent_gen=12000000 +num_prons=1000000 +morfessor=true +tag_percentage=0.1 +denlats_only=false + +[ ! -f ./lang.conf ] && echo 'Language configuration does not exist! Use the configurations in conf/lang/* as a startup' && exit 1 +[ ! -f ./conf/common_vars.sh ] && echo 'the file conf/common_vars.sh does not exist!' && exit 1 + +. conf/common_vars.sh || exit 1; +. ./lang.conf || exit 1; + +[ -f local.conf ] && . ./local.conf + +. ./utils/parse_options.sh + +set -e #Exit on non-zero return code from any command +set -o pipefail #Exit if any of the commands in the pipeline will + #return non-zero return code +#set -u #Fail on an undefined variable + +lexicon=data/local/lexicon.txt +if $extend_lexicon; then + lexicon=data/local/lexiconp.txt +fi + +./local/check_tools.sh || exit 1 + +#Preparing dev2h and train directories +if [ ! -f data/raw_train_data/.done ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the TRAIN set" + echo --------------------------------------------------------------------- + + local/make_corpus_subset.sh "$train_data_dir" "$train_data_list" ./data/raw_train_data + train_data_dir=`readlink -f ./data/raw_train_data` + touch data/raw_train_data/.done +fi +nj_max=`cat $train_data_list | wc -l` +if [[ "$nj_max" -lt "$train_nj" ]] ; then + echo "The maximum reasonable number of jobs is $nj_max (you have $train_nj)! (The training and decoding process has file-granularity)" + exit 1; + train_nj=$nj_max +fi +train_data_dir=`readlink -f ./data/raw_train_data` + +if [ ! -d data/raw_dev2h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV2H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev2h_data_dir" "$dev2h_data_list" ./data/raw_dev2h_data || exit 1 +fi + +if [ ! -d data/raw_dev10h_data ]; then + echo --------------------------------------------------------------------- + echo "Subsetting the DEV10H set" + echo --------------------------------------------------------------------- + local/make_corpus_subset.sh "$dev10h_data_dir" "$dev10h_data_list" ./data/raw_dev10h_data || exit 1 +fi + +# Move data/dev2h preparation forward so we can get data/dev2h/text for +# diagnostic purpose when extending the lexicon. +if [[ ! -f data/dev2h/wav.scp || data/dev2h/wav.scp -ot ./data/raw_dev2h_data/audio ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h data lists in data/dev2h on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/dev2h + local/prepare_acoustic_training_data.pl \ + --fragmentMarkers \-\*\~ \ + `pwd`/data/raw_dev2h_data data/dev2h > data/dev2h/skipped_utts.log || exit 1 +fi + +if [[ ! -f data/dev2h/glm || data/dev2h/glm -ot "$glmFile" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing dev2h stm files in data/dev2h on" `date` + echo --------------------------------------------------------------------- + if [ -z $dev2h_stm_file ]; then + echo "WARNING: You should define the variable stm_file pointing to the IndusDB stm" + echo "WARNING: Doing that, it will give you scoring close to the NIST scoring. " + local/prepare_stm.pl --fragmentMarkers \-\*\~ data/dev2h || exit 1 + else + local/augment_original_stm.pl $dev2h_stm_file data/dev2h || exit 1 + fi + [ ! -z $glmFile ] && cp $glmFile data/dev2h/glm + +fi + +mkdir -p data/local +if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing lexicon in data/local on" `date` + echo --------------------------------------------------------------------- + + local/lexicon/make_word_list.py $train_data_dir/filelist.list $train_data_dir/transcription data/local/word_list.txt + echo -e " SIL\n \n \n " > data/local/nonspeech.txt + echo -e " " > data/local/extraspeech.txt + + fmt="word_list" + if $morfessor; then + fmt="morfessor" + morfessor-train --encoding=utf_8 --traindata-list -f"-_" -s data/local/morfessor.bin \ + data/local/word_list.txt + morfessor-segment --encoding=utf_8 --output-format-separator '.' --viterbi-maxlen 3 \ + -l data/local/morfessor.bin <(cut -d' ' -f2 data/local/word_list.txt) \ + | sed 's/\.[\_\-]\././g' > data/local/segments + cut -d' ' data/local/word_list.txt -f2 | paste -d' ' - data/local/segments > data/local/word_list_tmp.txt + mv data/local/word_list_tmp.txt data/local/word_list.txt + fi + + local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ + --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ + --verbose data/local/word_list.txt data/local/lexicon.txt data/local/ + local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ + --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local + cp data/local/lexicon.txt data/local/filtered_lexicon.txt + if $extend_lexicon; then + # Extend the original lexicon. + # Will creates the files data/local/extend/{lexiconp.txt,oov2prob}. + mv data/local/lexicon.txt data/local/lexicon_orig.txt + local/extend_lexicon.sh --cmd "$train_cmd" --cleanup false \ + --num-sent-gen $num_sent_gen --num-prons $num_prons \ + data/local/lexicon_orig.txt data/local/extend data/dev2h/text + cp data/local/extend/lexiconp.txt data/local/ + fi +fi + +mkdir -p data/lang +if [[ ! -f data/lang/L.fst || data/lang/L.fst -ot $lexicon ]]; then + echo --------------------------------------------------------------------- + echo "Creating L.fst etc in data/lang on" `date` + echo --------------------------------------------------------------------- + utils/prepare_lang.sh \ + --share-silence-phones true \ + data/local $oovSymbol data/local/tmp.lang data/lang +fi + +if [[ ! -f data/train/wav.scp || data/train/wav.scp -ot "$train_data_dir" ]]; then + echo --------------------------------------------------------------------- + echo "Preparing acoustic training lists in data/train on" `date` + echo --------------------------------------------------------------------- + mkdir -p data/train + local/prepare_acoustic_training_data.pl \ + --vocab $lexicon --fragmentMarkers \-\*\~ \ + $train_data_dir data/train > data/train/skipped_utts.log +fi + +if [[ ! -f data/srilm/lm.gz || data/srilm/lm.gz -ot data/train/text ]]; then + echo --------------------------------------------------------------------- + echo "Training SRILM language models on" `date` + echo --------------------------------------------------------------------- + # If extending the lexicon, use "--words-file data/local/lexicon_orig.txt" so + # that the LM is trained just on the vocab that appears in the text. Will add + # in the OOVs later. + words_file_param=() + if $extend_lexicon; then + words_file_param=(--words-file data/local/lexicon_orig.txt) + fi + local/train_lms_srilm.sh --oov-symbol "$oovSymbol"\ + "${words_file_param[@]}" \ + --train-text data/train/text data data/srilm +fi + +if [[ ! -f data/lang/G.fst || data/lang/G.fst -ot data/srilm/lm.gz ||\ + ( -f data/local/extend/oov2prob &&\ + data/lang/G.fst -ot data/local/extend/oov2prob ) ]]; then + echo --------------------------------------------------------------------- + echo "Creating G.fst on " `date` + echo --------------------------------------------------------------------- + extend_lexicon_param=() + if $extend_lexicon; then + [ -f data/local/extend/original_oov_rates ] || exit 1; + unk_fraction=`cat data/local/extend/original_oov_rates |\ + grep "token" | awk -v x=$unk_fraction_boost '{print $NF/100.0*x}'` + extend_lexicon_param=(--cleanup false --unk-fraction $unk_fraction \ + --oov-prob-file data/local/extend/oov2prob) + fi + local/arpa2G.sh ${extend_lexicon_param[@]} \ + data/srilm/lm.gz data/lang data/lang +fi + +echo --------------------------------------------------------------------- +echo "Starting plp feature extraction for data/train in plp on" `date` +echo --------------------------------------------------------------------- + +if [ ! -f data/train/.plp.done ]; then + if $use_pitch; then + steps/make_plp_pitch.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp_pitch/train plp + else + steps/make_plp.sh --cmd "$train_cmd" --nj $train_nj data/train exp/make_plp/train plp + fi + utils/fix_data_dir.sh data/train + steps/compute_cmvn_stats.sh data/train exp/make_plp/train plp + utils/fix_data_dir.sh data/train + touch data/train/.plp.done +fi + +touch data/.extlex +mkdir -p exp + +echo ------------------------------------------------------------------------- +echo "Extended lexicon finished on" `date`. Now running script run-1-main.sh +echo ------------------------------------------------------------------------- +./run-1-main-unicode.sh --denlats-only "$denlats_only" +exit 0 diff --git a/egs/babel/s5d/run-1-main-unicode.sh b/egs/babel/s5d/run-1-main-unicode.sh index e3fb2486c83..acd2693cbef 100755 --- a/egs/babel/s5d/run-1-main-unicode.sh +++ b/egs/babel/s5d/run-1-main-unicode.sh @@ -80,7 +80,7 @@ if [[ ! -f $lexicon || $lexicon -ot "$lexicon_file" ]]; then local/lexicon/make_unicode_lexicon.py --tag_percentage $tag_percentage --fmt $fmt \ --nonspeech data/local/nonspeech.txt --extraspeech data/local/extraspeech.txt \ - --verbose data/local/word_list.txt data/local/lexicon.txt + --verbose data/local/word_list.txt data/local/lexicon.txt data/local/ local/prepare_unicode_lexicon.py --nonspeech data/local/nonspeech.txt \ --extraspeech data/local/extraspeech.txt data/local/lexicon_table.txt data/local cp data/local/lexicon.txt data/local/filtered_lexicon.txt diff --git a/egs/babel/s5d/run-4-anydecode.sh b/egs/babel/s5d/run-4-anydecode.sh index 083ac7e9879..8ac0fde2621 100755 --- a/egs/babel/s5d/run-4-anydecode.sh +++ b/egs/babel/s5d/run-4-anydecode.sh @@ -26,7 +26,7 @@ extra_left_context=40 extra_right_context=40 frames_per_chunk=20 -echo "run-4-test.sh $@" +echo "$0 $@" . utils/parse_options.sh @@ -61,7 +61,9 @@ dataset_type=${dir%%.*} #By default, we want the script to accept how the dataset should be handled, #i.e. of what kind is the dataset if [ -z ${kind} ] ; then - if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then + if [ "$dataset_type" == "dev2h" ] || \ + [ "$dataset_type" == "dev10h" ] || \ + [ "$dataset_type" == "train" ]; then dataset_kind=supervised else dataset_kind=unsupervised @@ -96,11 +98,24 @@ if [ -z $my_data_dir ] || [ -z $my_data_list ] ; then exit 1 fi +if [ "$dataset_type" == "train" ] ; then + local/ali_to_rttm.sh --cmd "$decode_cmd" data/train data/langp_test exp/tri5_ali + bash -x local/qbe/wav_to_ecf.sh data/train/wav.scp > data/train/ecf.train.xml + train_rttm_file=./exp/tri5_ali/rttm + train_ecf_file=./data/train/ecf.train.xml +fi + + eval my_stm_file=\$${dataset_type}_stm_file eval my_ecf_file=\$${dataset_type}_ecf_file eval my_rttm_file=\$${dataset_type}_rttm_file eval my_nj=\$${dataset_type}_nj #for shadow, this will be re-set when appropriate +echo "my_stm_file=$my_stm_file" +echo "my_ecf_file=$my_ecf_file" +echo "my_rttm_file=$my_rttm_file" +echo "my_nj=$my_nj" + if [ -z "$my_nj" ]; then echo >&2 "You didn't specify the number of jobs -- variable \"${dataset_type}_nj\" not defined." exit 1 @@ -214,7 +229,8 @@ if [ ! -f $dataset_dir/.done ] ; then . ./local/datasets/supervised_seg.sh || exit 1 elif [ "$dataset_segments" == "uem" ]; then . ./local/datasets/supervised_uem.sh || exit 1 - elif [ "$dataset_segments" == "pem" ]; then + elif [ "$dataset_segments" == "train" ] ||\ + [ "$dataset_segments" == "pem" ]; then . ./local/datasets/supervised_pem.sh || exit 1 else echo "Unknown type of the dataset: \"$dataset_segments\"!"; @@ -294,29 +310,31 @@ echo --------------------------------------------------------------------- echo "Preparing kws data files in ${dataset_dir} on" `date` echo --------------------------------------------------------------------- lang=data/lang -if ! $skip_kws ; then - if $extra_kws ; then - L1_lex=data/local/lexiconp.txt - . ./local/datasets/extra_kws.sh || exit 1 - fi - if $vocab_kws ; then - . ./local/datasets/vocab_kws.sh || exit 1 - fi - if [ ! -f data/lang.phn/G.fst ] ; then - ./local/syllab/run_phones.sh --stage -2 ${dataset_dir} - else - ./local/syllab/run_phones.sh ${dataset_dir} - fi +if [ ! -f data/dev10h.pem/.done.kws.dev ] ; then + if ! $skip_kws ; then + if $extra_kws ; then + L1_lex=data/local/lexiconp.txt + . ./local/datasets/extra_kws.sh || exit 1 + fi + if $vocab_kws ; then + . ./local/datasets/vocab_kws.sh || exit 1 + fi + if [ ! -f data/lang.phn/G.fst ] ; then + ./local/syllab/run_phones.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_phones.sh ${dataset_dir} + fi - if [ ! -f data/lang.syll/G.fst ] ; then - ./local/syllab/run_syllabs.sh --stage -2 ${dataset_dir} - else - ./local/syllab/run_syllabs.sh ${dataset_dir} - fi + if [ ! -f data/lang.syll/G.fst ] ; then + ./local/syllab/run_syllabs.sh --stage -2 ${dataset_dir} + else + ./local/syllab/run_syllabs.sh ${dataset_dir} + fi - ./local/search/run_search.sh --dir ${dataset_dir##*/} - ./local/search/run_phn_search.sh --dir ${dataset_dir##*/} - ./local/search/run_syll_search.sh --dir ${dataset_dir##*/} + ./local/search/run_search.sh --dir ${dataset_dir##*/} + ./local/search/run_phn_search.sh --dir ${dataset_dir##*/} + ./local/search/run_syll_search.sh --dir ${dataset_dir##*/} + fi fi if $data_only ; then @@ -379,72 +397,6 @@ if $tri5_only; then exit 0 fi -#################################################################### -## SGMM2 decoding -## We Include the SGMM_MMI inside this, as we might only have the DNN systems -## trained and not PLP system. The DNN systems build only on the top of tri5 stage -#################################################################### -if [ -f exp/sgmm5/.done ]; then - decode=exp/sgmm5/decode_fmllr_${dataset_id} - if [ ! -f $decode/.done ]; then - echo --------------------------------------------------------------------- - echo "Spawning $decode on" `date` - echo --------------------------------------------------------------------- - utils/mkgraph.sh \ - data/langp_test exp/sgmm5 exp/sgmm5/graph |tee exp/sgmm5/mkgraph.log - - mkdir -p $decode - steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ - --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ - exp/sgmm5/graph ${dataset_dir} $decode |tee $decode/decode.log - touch $decode/.done - - if ! $fast_path ; then - local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ - --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ - "${lmwt_plp_extra_opts[@]}" \ - ${dataset_dir} data/langp_test exp/sgmm5/decode_fmllr_${dataset_id} - fi - fi - - #################################################################### - ## - ## SGMM_MMI rescoring - ## - #################################################################### - - for iter in 1 2 3 4; do - # Decode SGMM+MMI (via rescoring). - decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter - if [ -x exp/sgmm5_mmi_b0.1 ] && [ ! -f $decode/.done ]; then - - mkdir -p $decode - steps/decode_sgmm2_rescore.sh --skip-scoring true \ - --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ - data/langp_test ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log - - touch $decode/.done - fi - done - - #We are done -- all lattices has been generated. We have to - #a)Run MBR decoding - #b)Run KW search - for iter in 1 2 3 4; do - # Decode SGMM+MMI (via rescoring). - decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter - if [ -f $decode/.done ]; then - local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ - --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ - --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ - "${lmwt_plp_extra_opts[@]}" \ - ${dataset_dir} data/langp_test $decode - fi - done -fi - - #################################################################### ## @@ -476,10 +428,13 @@ fi ## nnet3 model decoding ## #################################################################### -if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then +if [ -f exp/nnet3/lstm_bidirectional_sp/final.mdl ]; then decode=exp/nnet3/lstm_bidirectional_sp/decode_${dataset_id} rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " decode_script=steps/nnet3/decode.sh + my_nj_backup=$my_nj + echo "Modifying the number of jobs as this is an RNN and decoding can be extremely slow." + my_nj=`cat ${dataset_dir}_hires/spk2utt|wc -l` if [ ! -f $decode/.done ]; then mkdir -p $decode $decode_script --nj $my_nj --cmd "$decode_cmd" $rnn_opts \ @@ -496,9 +451,11 @@ if [ -f exp/nnet3/lstm_bidirectional_sp/.done ]; then --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ "${lmwt_dnn_extra_opts[@]}" \ ${dataset_dir} data/langp_test $decode + + my_nj=$my_nj_backup fi -if [ -f exp/nnet3/lstm_realigned_bidirectional_sp//.done ]; then +if [ -f exp/nnet3/lstm_realigned_bidirectional_sp/final.mdl ]; then decode=exp/nnet3/lstm_realigned_bidirectional_sp//decode_${dataset_id} rnn_opts=" --extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 " decode_script=steps/nnet3/decode.sh @@ -519,7 +476,7 @@ if [ -f exp/nnet3/lstm_realigned_bidirectional_sp//.done ]; then "${lmwt_dnn_extra_opts[@]}" \ ${dataset_dir} data/langp_test $decode fi -if [ -f exp/nnet3/lstm_sp/.done ]; then +if [ -f exp/nnet3/lstm_sp/final.mdl ]; then decode=exp/nnet3/lstm_sp/decode_${dataset_id} rnn_opts=" --extra-left-context 40 --extra-right-context 0 --frames-per-chunk 20 " decode_script=steps/nnet3/decode.sh @@ -541,7 +498,7 @@ if [ -f exp/nnet3/lstm_sp/.done ]; then ${dataset_dir} data/langp_test $decode fi -if [ -f exp/$nnet3_model/.done ]; then +if [ -f exp/$nnet3_model/final.mdl ]; then decode=exp/$nnet3_model/decode_${dataset_id} rnn_opts= decode_script=steps/nnet3/decode.sh @@ -583,6 +540,7 @@ if [ -f exp/$chain_model/final.mdl ]; then touch exp/nnet3$parent_dir_suffix/ivectors_${dataset_id}/.done fi + my_nj_backup=$my_nj rnn_opts= if [ "$is_rnn" == "true" ]; then rnn_opts=" --extra-left-context $extra_left_context --extra-right-context $extra_right_context --frames-per-chunk $frames_per_chunk " @@ -608,6 +566,7 @@ if [ -f exp/$chain_model/final.mdl ]; then --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ "${lmwt_chain_extra_opts[@]}" \ ${dataset_dir} data/langp_test $decode + my_nj=$my_nj_backup else echo "no chain model exp/$chain_model" fi @@ -720,5 +679,72 @@ for dnn in tri6_nnet_semi_supervised tri6_nnet_semi_supervised2 \ ${dataset_dir} data/langp_test $decode fi done + +#################################################################### +## SGMM2 decoding +## We Include the SGMM_MMI inside this, as we might only have the DNN systems +## trained and not PLP system. The DNN systems build only on the top of tri5 stage +#################################################################### +if [ -f exp/sgmm5/.done ]; then + decode=exp/sgmm5/decode_fmllr_${dataset_id} + if [ ! -f $decode/.done ]; then + echo --------------------------------------------------------------------- + echo "Spawning $decode on" `date` + echo --------------------------------------------------------------------- + utils/mkgraph.sh \ + data/langp_test exp/sgmm5 exp/sgmm5/graph |tee exp/sgmm5/mkgraph.log + + mkdir -p $decode + steps/decode_sgmm2.sh --skip-scoring true --use-fmllr true --nj $my_nj \ + --cmd "$decode_cmd" --transform-dir exp/tri5/decode_${dataset_id} "${decode_extra_opts[@]}"\ + exp/sgmm5/graph ${dataset_dir} $decode |tee $decode/decode.log + touch $decode/.done + + if ! $fast_path ; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test exp/sgmm5/decode_fmllr_${dataset_id} + fi + fi + + #################################################################### + ## + ## SGMM_MMI rescoring + ## + #################################################################### + + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -x exp/sgmm5_mmi_b0.1 ] && [ ! -f $decode/.done ]; then + + mkdir -p $decode + steps/decode_sgmm2_rescore.sh --skip-scoring true \ + --cmd "$decode_cmd" --iter $iter --transform-dir exp/tri5/decode_${dataset_id} \ + data/langp_test ${dataset_dir} exp/sgmm5/decode_fmllr_${dataset_id} $decode | tee ${decode}/decode.log + + touch $decode/.done + fi + done + + #We are done -- all lattices has been generated. We have to + #a)Run MBR decoding + #b)Run KW search + for iter in 1 2 3 4; do + # Decode SGMM+MMI (via rescoring). + decode=exp/sgmm5_mmi_b0.1/decode_fmllr_${dataset_id}_it$iter + if [ -f $decode/.done ]; then + local/run_kws_stt_task2.sh --cer $cer --max-states $max_states \ + --skip-scoring $skip_scoring --extra-kws $extra_kws --wip $wip \ + --cmd "$decode_cmd" --skip-kws $skip_kws --skip-stt $skip_stt \ + "${lmwt_plp_extra_opts[@]}" \ + ${dataset_dir} data/langp_test $decode + fi + done +fi + + echo "Everything looking good...." exit 0 diff --git a/egs/callhome_egyptian/s5/run.sh b/egs/callhome_egyptian/s5/run.sh index 9d1fa692da0..4d1359bea98 100755 --- a/egs/callhome_egyptian/s5/run.sh +++ b/egs/callhome_egyptian/s5/run.sh @@ -29,7 +29,7 @@ local/callhome_prepare_dict.sh $eca_lexicon utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang # Make sure that you do not use your test and your dev sets to train the LM -# Some form of cross validation is possible where you decode your dev/set based on an +# Some form of cross validation is possible where you decode your dev/set based on an # LM that is trained on everything but that that conversation local/callhome_train_lms.sh $split local/callhome_create_test_lang.sh @@ -100,7 +100,7 @@ steps/train_lda_mllt.sh --cmd "$train_cmd" \ exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; )& -# Next we'll use fMLLR and train with SAT (i.e. on +# Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ @@ -108,7 +108,7 @@ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 2200 25000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1; - + ( utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -140,9 +140,9 @@ steps/train_sat.sh --cmd "$train_cmd" \ )& dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 1G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 1G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -153,17 +153,17 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & # Decode test sets ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_test exp/tri5a/graph data/test exp/tri6a_dnn/decode_test - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_sup exp/tri5a/graph data/sup exp/tri6a_dnn/decode_sup - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_h5 exp/tri5a/graph data/h5 exp/tri6a_dnn/decode_h5 ) & diff --git a/egs/chime3/s5/cmd.sh b/egs/chime3/s5/cmd.sh index 7ee5fbcd73d..cf2570db1a9 100755 --- a/egs/chime3/s5/cmd.sh +++ b/egs/chime3/s5/cmd.sh @@ -6,9 +6,9 @@ # the number of cpus on your machine. #a) JHU cluster options -#export train_cmd="queue.pl -l arch=*64" -#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" +#export train_cmd="queue.pl" +#export decode_cmd="queue.pl --mem 4G" +#export mkgraph_cmd="queue.pl --mem 4G" #export cuda_cmd="..." diff --git a/egs/csj/s5/local/csj_run_rnnlm.sh b/egs/csj/s5/local/csj_run_rnnlm.sh index 5c6cd4343f6..e02f19bb680 100755 --- a/egs/csj/s5/local/csj_run_rnnlm.sh +++ b/egs/csj/s5/local/csj_run_rnnlm.sh @@ -3,7 +3,7 @@ # Copyright 2016 Tokyo Institute of Technology (Authors: Tomohiro Tanaka, Takafumi Moriya and Takahiro Shinozaki) # 2016 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) # Apache 2.0 -# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. +# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh @@ -21,7 +21,7 @@ echo h30 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" data/local/rnnlm.h30 sleep 20; # wait till tools compiled. -echo h100 Begin +echo h100 Begin local/csj_train_rnnlms.sh --dict-suffix "_nosp" \ --hidden 100 --nwords 10000 --class 200 \ --direct 0 data/local/rnnlm.h100 @@ -60,9 +60,9 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.5" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --N 100 --cmd "queue -l mem_free=1G" --inv-acwt $acwt 0.5 \ + --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.5 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.5 - + rm -rf ${resultsdir}_L0.25 rm -rf ${resultsdir}_L0.75 cp -rp ${resultsdir}_L0.5 ${resultsdir}_L0.25 @@ -70,12 +70,12 @@ for dict in rnnlm.h30 rnnlm.h100 rnnlm.h200 rnnlm.h300 rnnlm.h400 rnnlm.h500 ;do echo "rnnlm0.25" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.25 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.25 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.25 echo "rnnlm0.75" steps/rnnlmrescore.sh --rnnlm_ver $rnnlm_ver \ - --stage 7 --N 100 --cmd "$decode_cmd -l mem_free=1G" --inv-acwt $acwt 0.75 \ + --stage 7 --N 100 --cmd "$decode_cmd --mem 1G" --inv-acwt $acwt 0.75 \ data/lang_csj_tg $dir data/$eval_num $sourcedir ${resultsdir}_L0.75 done done diff --git a/egs/csj/s5/local/nnet/run_lstm.sh b/egs/csj/s5/local/nnet/run_lstm.sh index 3cc330c55a8..dc0f40dec24 100755 --- a/egs/csj/s5/local/nnet/run_lstm.sh +++ b/egs/csj/s5/local/nnet/run_lstm.sh @@ -34,10 +34,10 @@ stage=0 steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 $dir $dir/log $dir/data || exit 1; steps/compute_cmvn_stats.sh $dir $dir/log $dir/data || exit 1; done - + # Training set utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp - steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd -tc 10" \ + steps/make_fbank_pitch.sh --nj 10 --cmd "$train_cmd --max-jobs-run 10" \ $train $train/log $train/data || exit 1; steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; # Split the training set diff --git a/egs/fame/README.txt b/egs/fame/README.txt new file mode 100644 index 00000000000..d2ed39eef75 --- /dev/null +++ b/egs/fame/README.txt @@ -0,0 +1,15 @@ +The FAME! Speech Corpus + +The components of the Frisian data collection are speech and language resources gathered for building a large vocabulary ASR system for the Frisian language. Firstly, a new broadcast database is created by collecting recordings from the archives of the regional broadcaster Omrop Fryslân, and annotating them with various information such as the language switches and speaker details. The second component of this collection is a language model created on a text corpus with diverse vocabulary. Thirdly, a Frisian phonetic dictionary with the mappings between the Frisian words and phones is built to make the ASR viable for this under-resourced language. Finally, an ASR recipe is provided which uses all previous resources to perform recognition and present the recognition performances. + +The Corpus consists of short utterances extracted from 203 audio segments of approximately 5 minutes long which are parts of various radio programs covering a time span of almost 50 years (1966-2015), adding a longitudinal dimension to the database. The content of the recordings are very diverse including radio programs about culture, history, literature, sports, nature, agriculture, politics, society and languages. The total duration of the manually annotated radio broadcasts sums up to 18 hours, 33 minutes and 57 seconds. The stereo audio data has a sampling frequency of 48 kHz and 16-bit resolution per sample. The available meta-information helped the annotators to identify these speakers and mark them either using their names or the same label (if the name is not known). There are 309 identified speakers in the FAME! Speech Corpus, 21 of whom appear at least 3 times in the database. These speakers are mostly program presenters and celebrities appearing multiple times in different recordings over years. There are 233 unidentified speakers due to lack of meta-information. The total number of word- and sentence-level code-switching cases in the FAME! Speech Corpus is equal to 3837. Music portions have been removed, except where these overlap with speech. + +A full description of the FAME! Speech Corpus is provided in: + +Yilmaz, E., Heuvel, H. van den, Van de Velde, H., Kampstra, F., Algra, J., Leeuwen, D. van: + +Open Source Speech and Language Resources for Frisian Language. + +In: Proceedings Interspeech 2016, pp. 1536--1540, 8-12 September 2016, San Francisco + +Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus diff --git a/egs/fame/s5/RESULTS b/egs/fame/s5/RESULTS new file mode 100644 index 00000000000..a8541fba6b5 --- /dev/null +++ b/egs/fame/s5/RESULTS @@ -0,0 +1,28 @@ +%WER 41.10 [ 4974 / 12101, 522 ins, 1223 del, 3229 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 38.10 [ 4909 / 12886, 527 ins, 1220 del, 3162 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 41.06 [ 4969 / 12101, 514 ins, 1277 del, 3178 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 40.38 [ 4886 / 12101, 515 ins, 1225 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.0 +%WER 40.15 [ 4859 / 12101, 514 ins, 1177 del, 3168 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5 +%WER 37.86 [ 4879 / 12886, 596 ins, 1083 del, 3200 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0 +%WER 37.16 [ 4789 / 12886, 592 ins, 1056 del, 3141 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_10_0.0 +%WER 36.92 [ 4757 / 12886, 618 ins, 1010 del, 3129 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_10_0.0 +%WER 42.38 [ 5129 / 12101, 576 ins, 1171 del, 3382 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_11_0.0 +%WER 39.14 [ 5043 / 12886, 536 ins, 1172 del, 3335 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 42.05 [ 5088 / 12101, 525 ins, 1282 del, 3281 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 41.41 [ 5011 / 12101, 461 ins, 1345 del, 3205 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5 +%WER 40.97 [ 4958 / 12101, 485 ins, 1279 del, 3194 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5 +%WER 38.79 [ 4998 / 12886, 512 ins, 1194 del, 3292 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.0 +%WER 38.16 [ 4917 / 12886, 544 ins, 1128 del, 3245 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 +%WER 37.68 [ 4856 / 12886, 564 ins, 1068 del, 3224 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_11_0.0 +%WER 70.85 [ 8574 / 12101, 414 ins, 2596 del, 5564 sub ] exp/mono/decode_devel/wer_9_0.0 +%WER 68.17 [ 8785 / 12886, 413 ins, 2704 del, 5668 sub ] exp/mono/decode_test/wer_9_0.0 +%WER 44.05 [ 5330 / 12101, 560 ins, 1467 del, 3303 sub ] exp/sgmm2/decode_devel/wer_10_0.0 +%WER 40.22 [ 5183 / 12886, 680 ins, 1142 del, 3361 sub ] exp/sgmm2/decode_test/wer_9_0.0 +%WER 54.39 [ 6582 / 12101, 695 ins, 1595 del, 4292 sub ] exp/tri1/decode_devel/wer_10_0.0 +%WER 51.60 [ 6649 / 12886, 630 ins, 1706 del, 4313 sub ] exp/tri1/decode_test/wer_11_0.0 +%WER 51.53 [ 6236 / 12101, 659 ins, 1675 del, 3902 sub ] exp/tri2/decode_devel/wer_11_0.0 +%WER 48.32 [ 6226 / 12886, 643 ins, 1669 del, 3914 sub ] exp/tri2/decode_test/wer_12_0.0 +%WER 47.15 [ 5706 / 12101, 580 ins, 1537 del, 3589 sub ] exp/tri3/decode_devel/wer_13_0.0 +%WER 52.13 [ 6308 / 12101, 623 ins, 1706 del, 3979 sub ] exp/tri3/decode_devel.si/wer_11_0.5 +%WER 43.71 [ 5632 / 12886, 594 ins, 1538 del, 3500 sub ] exp/tri3/decode_test/wer_14_0.0 +%WER 48.21 [ 6212 / 12886, 825 ins, 1358 del, 4029 sub ] exp/tri3/decode_test.si/wer_10_0.0 diff --git a/egs/fame/s5/cmd.sh b/egs/fame/s5/cmd.sh new file mode 120000 index 00000000000..19f7e836644 --- /dev/null +++ b/egs/fame/s5/cmd.sh @@ -0,0 +1 @@ +../../wsj/s5/cmd.sh \ No newline at end of file diff --git a/egs/fame/s5/conf/decode_dnn.config b/egs/fame/s5/conf/decode_dnn.config new file mode 100644 index 00000000000..89dd9929a62 --- /dev/null +++ b/egs/fame/s5/conf/decode_dnn.config @@ -0,0 +1,2 @@ +beam=18.0 # beam for decoding. Was 13.0 in the scripts. +lattice_beam=10.0 # this has most effect on size of the lattices. diff --git a/egs/fame/s5/conf/fbank.conf b/egs/fame/s5/conf/fbank.conf new file mode 100644 index 00000000000..c4b73674cab --- /dev/null +++ b/egs/fame/s5/conf/fbank.conf @@ -0,0 +1,2 @@ +# No non-default options for now. + diff --git a/egs/fame/s5/conf/mfcc.conf b/egs/fame/s5/conf/mfcc.conf new file mode 100644 index 00000000000..7361509099f --- /dev/null +++ b/egs/fame/s5/conf/mfcc.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/fame/s5/conf/mfcc_hires.conf b/egs/fame/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..434834a6725 --- /dev/null +++ b/egs/fame/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600) diff --git a/egs/fame/s5/conf/online_cmvn.conf b/egs/fame/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..cbdaf5f281c --- /dev/null +++ b/egs/fame/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh new file mode 100755 index 00000000000..2c2d1e79238 --- /dev/null +++ b/egs/fame/s5/local/fame_data_prep.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +set -e -o pipefail +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +echo "Preparing train, development and test data" +mkdir -p data data/local data/train data/devel data/test + +for x in train devel test; do + echo "Copy spk2utt, utt2spk, wav.scp, text for $x" + cp $corpus/data/$x/text data/$x/text || exit 1; + cp $corpus/data/$x/spk2utt data/$x/spk2utt || exit 1; + cp $corpus/data/$x/utt2spk data/$x/utt2spk || exit 1; + + # the corpus wav.scp contains physical paths, so we just re-generate + # the file again from scratchn instead of figuring out how to edit it + for rec in $(awk '{print $1}' $corpus/data/$x/text) ; do + spk=${rec%_*} + filename=$corpus/fame/wav/${x}/${rec:8}.wav + if [ ! -f "$filename" ] ; then + echo >&2 "The file $filename could not be found ($rec)" + exit 1 + fi + # we might want to store physical paths as a general rule + filename=$(readlink -f $filename) + echo "$rec $filename" + done > data/$x/wav.scp + + # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, + # duplicate entries and so on). Also, it regenerates the spk2utt from + # utt2sp + utils/fix_data_dir.sh data/$x +done + +echo "Copying language model" +if [ -f $corpus/lm/LM_FR_IKN3G ] ; then + gzip -c $corpus/lm/LM_FR_IKN3G > data/local/LM.gz +fi + +echo "Data preparation completed." + diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh new file mode 100755 index 00000000000..c6530217a67 --- /dev/null +++ b/egs/fame/s5/local/fame_dict_prep.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +mkdir -p data/lang data/local/dict + + +cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt +echo "!SIL SIL" >> data/local/dict/lexicon.txt +echo " SPN" >> data/local/dict/lexicon.txt +env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt +cat data/local/dict/lexicon.txt | \ + perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \ + sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt + + +touch data/local/dict/extra_questions.txt +touch data/local/dict/optional_silence.txt + +echo "SIL" > data/local/dict/optional_silence.txt +echo "SIL" > data/local/dict/silence_phones.txt +echo "" > data/local/dict/oov.txt + +echo "Dictionary preparation succeeded" diff --git a/egs/fame/s5/local/nnet/run_dnn.sh b/egs/fame/s5/local/nnet/run_dnn.sh new file mode 100755 index 00000000000..ca1efa5e0ac --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of fMLLR features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +set -eu + +# Config: +gmm=exp/tri3 +data_fmllr=data-fmllr-tri3 +stage=0 # resume training with --stage=N +# End of config. +. utils/parse_options.sh +# + +[ ! -e $data_fmllr/test ] && if [ $stage -le 0 ]; then + # Store fMLLR features, so we can train on them easily, + # devel + dir=$data_fmllr/devel + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_devel \ + $dir data/devel $gmm $dir/log $dir/data + # test + dir=$data_fmllr/test + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir $gmm/decode_test \ + $dir data/test $gmm $dir/log $dir/data + # train + dir=$data_fmllr/train + steps/nnet/make_fmllr_feats.sh --nj 10 --cmd "$train_cmd" \ + --transform-dir ${gmm}_ali \ + $dir data/train $gmm $dir/log $dir/data + # split the data : 90% train 10% cross-validation (held-out) + utils/subset_data_dir_tr_cv.sh $dir ${dir}_tr90 ${dir}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4b_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh --hid-dim 2048 --rbm-iter 10 $data_fmllr/train $dir +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4b_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4b_pretrain-dbn/final.feature_transform + dbn=exp/dnn4b_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + $data_fmllr/train_tr90 $data_fmllr/train_cv10 data/lang $ali $ali $dir + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/devel $dir/decode_devel + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $data_fmllr/test $dir/decode_test +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4b_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4b_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $data_fmllr/train data/lang $srcdir ${srcdir}_denlats +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $data_fmllr/train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/devel $dir/decode_devel_it${ITER} + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $data_fmllr/test $dir/decode_test_it${ITER} + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done + +# to see how model conversion to nnet2 works, run run_dnn_convert_nnet2.sh at this point. + diff --git a/egs/fame/s5/local/nnet/run_dnn_fbank.sh b/egs/fame/s5/local/nnet/run_dnn_fbank.sh new file mode 100755 index 00000000000..a81449ffbcf --- /dev/null +++ b/egs/fame/s5/local/nnet/run_dnn_fbank.sh @@ -0,0 +1,125 @@ +#!/bin/bash + +# Copyright 2012-2014 Brno University of Technology (Author: Karel Vesely) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) +# Apache 2.0 + +# This example script trains a DNN on top of FBANK features. +# The training is done in 3 stages, +# +# 1) RBM pre-training: +# in this unsupervised stage we train stack of RBMs, +# a good starting point for frame cross-entropy trainig. +# 2) frame cross-entropy training: +# the objective is to classify frames to correct pdfs. +# 3) sequence-training optimizing sMBR: +# the objective is to emphasize state-sequences with better +# frame accuracy w.r.t. reference alignment. + +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. + +. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. + ## This relates to the queue. + +. ./path.sh ## Source the tools/utils (import the queue.pl) + +dev=data-fbank/devel +tst=data-fbank/test +train=data-fbank/train + +dev_original=data/devel +tst_original=data/test +train_original=data/train + +gmm=exp/tri3 + +stage=0 +. utils/parse_options.sh || exit 1; + +set -eu + +# Make the FBANK features +[ ! -e $dev ] && if [ $stage -le 0 ]; then + # Dev set + utils/copy_data_dir.sh $dev_original $dev || exit 1; rm $dev/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $dev $dev/log $dev/data || exit 1; + steps/compute_cmvn_stats.sh $dev $dev/log $dev/data || exit 1; + # Test set + utils/copy_data_dir.sh $tst_original $tst || exit 1; rm $tst/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $tst $tst/log $tst/data || exit 1; + steps/compute_cmvn_stats.sh $tst $tst/log $tst/data || exit 1; + # Training set + utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp + steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ + $train $train/log $train/data || exit 1; + steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1; + # Split the training set + utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train ${train}_tr90 ${train}_cv10 +fi + +if [ $stage -le 1 ]; then + # Pre-train DBN, i.e. a stack of RBMs (small database, smaller DNN) + dir=exp/dnn4d-fbank_pretrain-dbn + $cuda_cmd $dir/log/pretrain_dbn.log \ + steps/nnet/pretrain_dbn.sh \ + --cmvn-opts "--norm-means=true --norm-vars=true" \ + --delta-opts "--delta-order=2" --splice 5 \ + --hid-dim 2048 --rbm-iter 10 $train $dir || exit 1; +fi + +if [ $stage -le 2 ]; then + # Train the DNN optimizing per-frame cross-entropy. + dir=exp/dnn4d-fbank_pretrain-dbn_dnn + ali=${gmm}_ali + feature_transform=exp/dnn4d-fbank_pretrain-dbn/final.feature_transform + dbn=exp/dnn4d-fbank_pretrain-dbn/6.dbn + # Train + $cuda_cmd $dir/log/train_nnet.log \ + steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ + ${train}_tr90 ${train}_cv10 data/lang $ali $ali $dir || exit 1; + # Decode (reuse HCLG graph) + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $dev $dir/decode_devel || exit 1; + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.1 \ + $gmm/graph $tst $dir/decode_test || exit 1; +fi + + +# Sequence training using sMBR criterion, we do Stochastic-GD with per-utterance updates. +# Note: With DNNs in RM, the optimal LMWT is 2-6. Don't be tempted to try acwt's like 0.2, +# the value 0.1 is better both for decoding and sMBR. +dir=exp/dnn4d-fbank_pretrain-dbn_dnn_smbr +srcdir=exp/dnn4d-fbank_pretrain-dbn_dnn +acwt=0.1 + +if [ $stage -le 3 ]; then + # First we generate lattices and alignments: + steps/nnet/align.sh --nj 20 --cmd "$train_cmd" \ + $train data/lang $srcdir ${srcdir}_ali || exit 1; + steps/nnet/make_denlats.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ + $train data/lang $srcdir ${srcdir}_denlats || exit 1; +fi + +if [ $stage -le 4 ]; then + # Re-train the DNN by 6 iterations of sMBR + steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 6 --acwt $acwt --do-smbr true \ + $train data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 + # Decode + for ITER in 6 3 1; do + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $dev $dir/decode_devel_it${ITER} || exit 1 + steps/nnet/decode.sh --nj 20 --cmd "$decode_cmd" --config conf/decode_dnn.config \ + --nnet $dir/${ITER}.nnet --acwt $acwt \ + $gmm/graph $tst $dir/decode_test_it${ITER} || exit 1 + done +fi + +echo Success +exit 0 + +# Getting results [see RESULTS file] +# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/local/score.sh b/egs/fame/s5/local/score.sh new file mode 120000 index 00000000000..0afefc3158c --- /dev/null +++ b/egs/fame/s5/local/score.sh @@ -0,0 +1 @@ +../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/fame/s5/local/wer_hyp_filter b/egs/fame/s5/local/wer_hyp_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_hyp_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_output_filter b/egs/fame/s5/local/wer_output_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_output_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/local/wer_ref_filter b/egs/fame/s5/local/wer_ref_filter new file mode 100755 index 00000000000..372d1a9c73a --- /dev/null +++ b/egs/fame/s5/local/wer_ref_filter @@ -0,0 +1,2 @@ +#!/bin/sed -f +s:::g diff --git a/egs/fame/s5/path.sh b/egs/fame/s5/path.sh new file mode 100755 index 00000000000..2d17b17a84a --- /dev/null +++ b/egs/fame/s5/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/fame/s5/run.sh b/egs/fame/s5/run.sh new file mode 100755 index 00000000000..26a8485ff7d --- /dev/null +++ b/egs/fame/s5/run.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +stage=0 +feat_nj=10 +train_nj=10 +decode_nj=10 +famecorpus=./corpus + +if [ -d $famecorpus ] ; then + echo "Fame corpus present. OK." +elif [ -f ./fame.tar.gz ] ; then + echo "Unpacking..." + tar xzf fame.tar.gz +elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then + echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ " + echo " and download the corpus and put it at $famecorpus" && exit 1 +fi + +numLeavesTri1=5000 +numGaussTri1=25000 +numLeavesMLLT=5000 +numGaussMLLT=25000 +numLeavesSAT=5000 +numGaussSAT=25000 +numGaussUBM=800 +numLeavesSGMM=10000 +numGaussSGMM=20000 + +if [ $stage -le 1 ]; then + local/fame_data_prep.sh $famecorpus || exit 1; + local/fame_dict_prep.sh $famecorpus || exit 1; + utils/prepare_lang.sh data/local/dict "" data/local/lang data/lang || exit 1; + utils/format_lm.sh data/lang data/local/LM.gz data/local/dict/lexicon.txt data/lang_test || exit 1; +fi + +if [ $stage -le 2 ]; then + # Feature extraction + for x in train devel test; do + steps/make_mfcc.sh --nj $feat_nj --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc || exit 1; + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1; + done +fi + +if [ $stage -le 3 ]; then + ### Monophone + echo "Starting monophone training." + steps/train_mono.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono || exit 1; + echo "Mono training done." + + echo "Decoding the development and test sets using monophone models." + utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/devel exp/mono/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/mono/graph data/test exp/mono/decode_test || exit 1; + echo "Monophone decoding done." +fi + + +if [ $stage -le 4 ]; then + ### Triphone + echo "Starting triphone training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/mono exp/mono_ali || exit 1; + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 || exit 1; + echo "Triphone training done." + + echo "Decoding the development and test sets using triphone models." + utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/devel exp/tri1/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri1/graph data/test exp/tri1/decode_test || exit 1; + echo "Triphone decoding done." +fi + +if [ $stage -le 5 ]; then + ### Triphone + LDA and MLLT + echo "Starting LDA+MLLT training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 || exit 1; + echo "LDA+MLLT training done." + + echo "Decoding the development and test sets using LDA+MLLT models." + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/devel exp/tri2/decode_devel || exit 1; + steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri2/graph data/test exp/tri2/decode_test || exit 1; + echo "LDA+MLLT decoding done." +fi + + +if [ $stage -le 6 ]; then + ### Triphone + LDA and MLLT + SAT and FMLLR + echo "Starting SAT+FMLLR training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 || exit 1; + echo "SAT+FMLLR training done." + + echo "Decoding the development and test sets using SAT+FMLLR models." + utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/devel exp/tri3/decode_devel || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" exp/tri3/graph data/test exp/tri3/decode_test || exit 1; + echo "SAT+FMLLR decoding done." +fi + + +if [ $stage -le 7 ]; then + echo "Starting SGMM training." + steps/align_fmllr.sh --nj $train_nj --cmd "$train_cmd" data/train data/lang exp/tri3 exp/tri3_ali || exit 1; + steps/train_ubm.sh --cmd "$train_cmd" $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm || exit 1; + steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM data/train data/lang exp/tri3_ali exp/ubm/final.ubm exp/sgmm2 || exit 1; + echo "SGMM training done." + + echo "Decoding the development and test sets using SGMM models" + utils/mkgraph.sh data/lang_test exp/sgmm2 exp/sgmm2/graph || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_devel exp/sgmm2/graph data/devel exp/sgmm2/decode_devel || exit 1; + steps/decode_sgmm2.sh --nj $decode_nj --cmd "$decode_cmd" --transform-dir exp/tri3/decode_test exp/sgmm2/graph data/test exp/sgmm2/decode_test || exit 1; + echo "SGMM decoding done." +fi + +if [ $stage -le 8 ]; then + echo "Starting DNN training and decoding." + local/nnet/run_dnn.sh || exit 1; + local/nnet/run_dnn_fbank.sh || exit 1; +fi + +#score +for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done diff --git a/egs/fame/s5/steps b/egs/fame/s5/steps new file mode 120000 index 00000000000..6e99bf5b5ad --- /dev/null +++ b/egs/fame/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fame/s5/utils b/egs/fame/s5/utils new file mode 120000 index 00000000000..b240885218f --- /dev/null +++ b/egs/fame/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 380a8aec936..ad650cd390e 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -256,7 +256,7 @@ steps/train_mmi_sgmm2.sh \ ( utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph -steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ +steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ exp/tri5a/graph data/dev exp/tri5a/decode_dev utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph @@ -274,9 +274,9 @@ done dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 2G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 --mem 2G") + --parallel-opts "--gpu 1" --cmd "queue.pl --mem 2G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ @@ -287,7 +287,7 @@ steps/nnet2/train_pnorm_ensemble.sh \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " -pe smp 4" \ + steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & wait diff --git a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh index eae5f7b8581..210d0f5646f 100755 --- a/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh +++ b/egs/fisher_english/s5/local/nnet2/run_6c_gpu.sh @@ -21,7 +21,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ "$USER" == dpovey ]; then diff --git a/egs/fisher_english/s5/local/online/run_nnet2.sh b/egs/fisher_english/s5/local/online/run_nnet2.sh index 0b9adb7d315..de4d56bb52e 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2.sh @@ -21,7 +21,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. EOF fi -parallel_opts="-l gpu=1" +parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 dir=exp/nnet2_online/nnet_a diff --git a/egs/fisher_english/s5/local/online/run_nnet2_b.sh b/egs/fisher_english/s5/local/online/run_nnet2_b.sh index 7eac7cf0a7d..e1491a10c0b 100755 --- a/egs/fisher_english/s5/local/online/run_nnet2_b.sh +++ b/egs/fisher_english/s5/local/online/run_nnet2_b.sh @@ -19,22 +19,22 @@ set -e if $use_gpu; then if ! cuda-compiled; then - cat <0){ seen[$1]=1; } } +cat $text | awk -v lex=$lexicon 'BEGIN{while((getline0){ seen[$1]=1; } } {for(n=1; n<=NF;n++) { if (seen[$n]) { printf("%s ", $n); } else {printf(" ");} } printf("\n");}' \ > $cleantext || exit 1; @@ -75,7 +59,7 @@ train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1; train_lm.sh --arpa --lmtype 4gram-mincount $dir || exit 1; # note: output is -# data/local/lm/3gram-mincount/lm_unpruned.gz +# data/local/lm/3gram-mincount/lm_unpruned.gz exit 0 @@ -97,7 +81,7 @@ cat $dir/word_map | awk '{print $1}' | cat - <(echo ""; echo "" ) > $sdir ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \ -map-unk "" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz -ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout +ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM # file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs @@ -106,7 +90,7 @@ ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout # Note: perplexity SRILM gives to Kaldi-LM model is similar to what kaldi-lm reports above. # Difference in WSJ must have been due to different treatment of . -ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout +ngram -lm $dir/3gram-mincount/lm_unpruned.gz -ppl $sdir/heldout # data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for in closed-vocabulary LM # file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs diff --git a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh index 4afa867503a..324061aa5ac 100644 --- a/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh @@ -8,7 +8,7 @@ set -o pipefail # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# . ./cmd.sh @@ -38,27 +38,21 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.00000125 max_param_change=1 num_jobs_nnet=4 num_epochs=2 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 - ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. if $use_gpu; then if ! cuda-compiled; then - cat <" + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/fisher_swbd/s5/path.sh b/egs/fisher_swbd/s5/path.sh index e14c6074f6b..2d17b17a84a 100755 --- a/egs/fisher_swbd/s5/path.sh +++ b/egs/fisher_swbd/s5/path.sh @@ -1,6 +1,6 @@ -export KALDI_ROOT=`pwd`/../../../ -export PWD=`pwd` -export PATH=$KALDI_ROOT/src/ivectorbin:$PWD/stanford-utils:$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$KALDI_ROOT/tools/kaldi_lm:$KALDI_ROOT/tools/srilm/bin:$KALDI_ROOT/tools/srilm/bin/i686-m64:$PATH +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh export LC_ALL=C diff --git a/egs/fisher_swbd/s5/run.sh b/egs/fisher_swbd/s5/run.sh index 5addefc5fe1..8b1af972647 100755 --- a/egs/fisher_swbd/s5/run.sh +++ b/egs/fisher_swbd/s5/run.sh @@ -7,6 +7,7 @@ mfccdir=mfcc set -e rescore=true + # prepare fisher data and put it under data/train_fisher local/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \ /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13 @@ -40,7 +41,7 @@ for f in spk2utt utt2spk wav.scp text segments reco2file_and_channel; do done # LM for train_all -local/fisher_train_lms.sh +local/fisher_train_lms.sh #local/fisher_create_test_lang.sh # Compiles G for trigram LM LM=data/local/lm/3gram-mincount/lm_unpruned.gz @@ -58,7 +59,7 @@ fi #local/eval2000_data_prep.sh /scail/group/deeplearning/speech/datasets/LDC2002S09/hub5e_00/ /scail/group/deeplearning/speech/datasets/LDC2002T43 || exit 1 local/eval2000_data_prep.sh /export/corpora/LDC/LDC2002S09/hub5e_00 /export/corpora/LDC/LDC2002T43 || exit 1 - + #local/rt03_data_prep.sh /scail/group/deeplearning/speech/datasets/rt_03 || exit 1 local/rt03_data_prep.sh /export/corpora/LDC/LDC2007S10 || exit 1 @@ -66,6 +67,12 @@ utils/fix_data_dir.sh data/train_all # Make MFCCs for the training set +# spread the mfccs over various machines, as this data-set is quite large. +if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then + mfcc=$(basename $mfccdir) # in case was absolute pathname (unlikely), get basename. + utils/create_split_dir.pl /export/b{05,06,07,08}/$USER/kaldi-data/egs/fisher_swbd/s5/$mfcc/storage \ + $mfccdir/storage +fi steps/make_mfcc.sh --nj 100 --cmd "$train_cmd" data/train_all exp/make_mfcc/train_all $mfccdir || exit 1; utils/fix_data_dir.sh data/train_all utils/validate_data_dir.sh data/train_all @@ -111,31 +118,31 @@ utils/data/remove_dup_utts.sh 200 data/train_30k data/train_30k_nodup utils/data/remove_dup_utts.sh 200 data/train_100k data/train_100k_nodup utils/data/remove_dup_utts.sh 300 data/train data/train_nodup -# The next commands are not necessary for the scripts to run, but increase -# efficiency of data access by putting the mfcc's of the subset +# The next commands are not necessary for the scripts to run, but increase +# efficiency of data access by putting the mfcc's of the subset # in a contiguous place in a file. -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_10k_nodup/feats.scp{,.bak} + cp data/train_10k_nodup/feats.scp{,.bak} copy-feats scp:data/train_10k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_10k_nodup.ark,$mfccdir/kaldi_fish_10k_nodup.scp \ && cp $mfccdir/kaldi_fish_10k_nodup.scp data/train_10k_nodup/feats.scp ) -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_30k_nodup/feats.scp{,.bak} + cp data/train_30k_nodup/feats.scp{,.bak} copy-feats scp:data/train_30k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_30k_nodup.ark,$mfccdir/kaldi_fish_30k_nodup.scp \ && cp $mfccdir/kaldi_fish_30k_nodup.scp data/train_30k_nodup/feats.scp ) -( . path.sh; +( . path.sh; # make sure mfccdir is defined as above.. - cp data/train_100k_nodup/feats.scp{,.bak} + cp data/train_100k_nodup/feats.scp{,.bak} copy-feats scp:data/train_100k_nodup/feats.scp ark,scp:$mfccdir/kaldi_fish_100k_nodup.ark,$mfccdir/kaldi_fish_100k_nodup.scp \ && cp $mfccdir/kaldi_fish_100k_nodup.scp data/train_100k_nodup/feats.scp ) # Start training on the Switchboard subset, which has cleaner alignments steps/train_mono.sh --nj 3 --cmd "$train_cmd" \ - data/train_10k_nodup data/lang_nosp exp/mono0a + data/train_10k_nodup data/lang_nosp exp/mono0a steps/align_si.sh --nj 10 --cmd "$train_cmd" \ data/train_30k_nodup data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1; @@ -171,8 +178,8 @@ steps/align_si.sh --nj 50 --cmd "$train_cmd" \ steps/train_deltas.sh --cmd "$train_cmd" \ 5500 90000 data/train_100k_nodup data/lang_nosp exp/tri1b_ali exp/tri2 || exit 1; #used to be 2500 20000 on 30k -( - graph_dir=exp/tri2/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri2/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri2 $graph_dir || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri2/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -180,7 +187,7 @@ steps/train_deltas.sh --cmd "$train_cmd" \ $graph_dir data/rt03 exp/tri2/decode_rt03_nosp_fsh_sw1_tg || exit 1; )& -# Train tri3a, the last speaker-independent triphone stage, +# Train tri3a, the last speaker-independent triphone stage, # on the whole Switchboard training set steps/align_si.sh --nj 100 --cmd "$train_cmd" \ data/train_swbd data/lang_nosp exp/tri2 exp/tri2_ali || exit 1; @@ -189,8 +196,8 @@ steps/train_deltas.sh --cmd "$train_cmd" \ 11500 200000 data/train_swbd data/lang_nosp exp/tri2_ali exp/tri3a || exit 1; #used to be 2500 20000 -( - graph_dir=exp/tri3a/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri3a/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri3a $graph_dir || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3a/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -205,8 +212,8 @@ steps/align_si.sh --nj 100 --cmd "$train_cmd" \ steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 11500 400000 data/train_nodup data/lang_nosp exp/tri3a_ali exp/tri3b || exit 1; -( - graph_dir=exp/tri3b/graph_nosp_fsh_sw1_tg +( + graph_dir=exp/tri3b/graph_nosp_fsh_sw1_tg utils/mkgraph.sh data/lang_nosp_fsh_sw1_tg exp/tri3b $graph_dir || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3b/decode_eval2000_nosp_fsh_sw1_tg || exit 1; @@ -232,16 +239,16 @@ if [ $rescore ]; then utils/build_const_arpa_lm.sh $LM_fg data/lang data/lang_fsh_sw1_fg fi -( +( graph_dir=exp/tri3b/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri3b $graph_dir || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/eval2000 exp/tri3b/decode_eval2000_fsh_sw1_tg || exit 1; steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ $graph_dir data/rt03 exp/tri3b/decode_rt03_fsh_sw1_tg || exit 1; -) & +)& -# Next we'll use fMLLR and train with SAT (i.e. on +# Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ @@ -250,7 +257,7 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 11500 800000 data/train_nodup data/lang exp/tri3b_ali exp/tri4a || exit 1; -( +( graph_dir=exp/tri4a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri4a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -272,11 +279,10 @@ fi steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ data/train_nodup data/lang exp/tri4a exp/tri4a_ali || exit 1; - steps/train_sat.sh --cmd "$train_cmd" \ 11500 1600000 data/train_nodup data/lang exp/tri4a_ali exp/tri5a || exit 1; -( +( graph_dir=exp/tri5a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri5a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -308,7 +314,7 @@ steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \ steps/train_sat.sh --cmd "$train_cmd" \ 11500 3200000 data/train_nodup data/lang exp/tri5a_ali exp/tri6a || exit 1; -( +( graph_dir=exp/tri6a/graph_fsh_sw1_tg utils/mkgraph.sh data/lang_fsh_sw1_tg exp/tri6a $graph_dir steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ @@ -331,9 +337,6 @@ fi #steps/align_fmllr.sh --nj 200 --cmd "$train_cmd" \ # data/train_nodup data/lang exp/tri6a exp/tri6a_ali || exit 1; - -# # The following is the current online-nnet2 recipe, with "multi-splice". +# The following is the current online-nnet2 recipe, with "multi-splice". # local/online/run_nnet2_ms.sh local/online/run_nnet2_ms.sh - - diff --git a/egs/gale_arabic/s5/local/online/run_nnet2.sh b/egs/gale_arabic/s5/local/online/run_nnet2.sh index 8ccbda5a8dc..0db62242459 100644 --- a/egs/gale_arabic/s5/local/online/run_nnet2.sh +++ b/egs/gale_arabic/s5/local/online/run_nnet2.sh @@ -18,23 +18,23 @@ decode_nj=30 if $use_gpu; then if ! cuda-compiled; then - cat <) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print " $s"; + } else { + @chars = split "", $s; + foreach $c (@chars) { + if ($c =~ /\p{InCJK_Unified_Ideographs}/) { + print " $c"; + } else { + print "$c"; + } + } + } + print " "; + } + print "\n"; +} + + diff --git a/egs/hkust/s5/local/hkust_data_prep.sh b/egs/hkust/s5/local/hkust_data_prep.sh index 07f3c9677d8..207f03af36b 100755 --- a/egs/hkust/s5/local/hkust_data_prep.sh +++ b/egs/hkust/s5/local/hkust_data_prep.sh @@ -104,8 +104,8 @@ awk '{ segment=$1; split(segment,S,"-"); side=S[2]; audioname=S[1];startf=S[3];e print segment " " audioname "-" side " " startf/100 " " endf/100}' <$dev_dir/text > $dev_dir/segments awk '{name = $0; gsub(".sph$","",name); gsub(".*/","",name); print(name " " $0)}' $dev_dir/sph.flist > $dev_dir/sph.scp -sph2pipe=`cd ../../..; echo $PWD/tools/sph2pipe_v2.5/sph2pipe` -[ ! -f $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; +sph2pipe=`which sph2pipe` || sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe +[ ! -x $sph2pipe ] && echo "Could not find the sph2pipe program at $sph2pipe" && exit 1; cat $train_dir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \ @@ -136,5 +136,4 @@ cat $dev_dir/segments | awk '{spk=substr($1,1,33); print $1 " " spk}' > $dev_dir cat $dev_dir/utt2spk | sort -k 2 | utils/utt2spk_to_spk2utt.pl > $dev_dir/spk2utt || exit 1; echo "$0: HKUST data preparation succeeded" - -exit; +exit 0 diff --git a/egs/hkust/s5/local/hkust_prepare_dict.sh b/egs/hkust/s5/local/hkust_prepare_dict.sh index 5cd864c52cc..6aca37586ed 100755 --- a/egs/hkust/s5/local/hkust_prepare_dict.sh +++ b/egs/hkust/s5/local/hkust_prepare_dict.sh @@ -312,5 +312,4 @@ cat $dict_dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", cat - $dict_dir/lexicon1.txt > $dict_dir/lexicon.txt || exit 1; echo "$0: HKUST dict preparation succeeded" - -exit; +exit 0; diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh index d6d0b2aa0bc..8520bb26d2d 100755 --- a/egs/hkust/s5/local/hkust_train_lms.sh +++ b/egs/hkust/s5/local/hkust_train_lms.sh @@ -19,9 +19,13 @@ done dir=data/local/lm mkdir -p $dir +export LC_ALL=C # You'll get errors about things being not sorted, if you + # have a different locale. kaldi_lm=`which train_lm.sh` if [ ! -x $kaldi_lm ]; then - echo "train_lm.sh is not found. Checkout tools/extra/install_kaldi_lm.sh" + echo "$0: train_lm.sh is not found. That might mean it's not installed" + echo "$0: or it is not added to PATH" + echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it" exit 1 fi diff --git a/egs/hkust/s5/local/online/run_nnet2_ms.sh b/egs/hkust/s5/local/online/run_nnet2_ms.sh index b935d86fa90..c3177e1136e 100755 --- a/egs/hkust/s5/local/online/run_nnet2_ms.sh +++ b/egs/hkust/s5/local/online/run_nnet2_ms.sh @@ -20,7 +20,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi - parallel_opts="-l gpu=1" + parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 # the _a is in case I want to change the parameters. @@ -29,7 +29,7 @@ else # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 - parallel_opts="-pe smp $num_threads" + parallel_opts="--num-threads $num_threads" fi # Run the common stages of training, including training the iVector extractor diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh deleted file mode 120000 index df664a0f1f1..00000000000 --- a/egs/hkust/s5/local/score.sh +++ /dev/null @@ -1 +0,0 @@ -../steps/scoring/score_kaldi_cer.sh \ No newline at end of file diff --git a/egs/hkust/s5/local/score.sh b/egs/hkust/s5/local/score.sh new file mode 100755 index 00000000000..766eaf3cd44 --- /dev/null +++ b/egs/hkust/s5/local/score.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e -o pipefail +set -x +steps/score_kaldi.sh "$@" +steps/score_kaldi_cer.sh --stage 2 "$@" + +echo "$0: Done" diff --git a/egs/hkust/s5/local/wer_output_filter b/egs/hkust/s5/local/wer_output_filter new file mode 100755 index 00000000000..aceeeec41b4 --- /dev/null +++ b/egs/hkust/s5/local/wer_output_filter @@ -0,0 +1,25 @@ +#!/usr/bin/env perl +# Copyright 2012-2014 Johns Hopkins University (Author: Yenda Trmal) +# Apache 2.0 +use utf8; + +use open qw(:encoding(utf8)); +binmode STDIN, ":utf8"; +binmode STDOUT, ":utf8"; +binmode STDERR, ":utf8"; + +while (<>) { + @F = split " "; + print $F[0] . " "; + foreach $s (@F[1..$#F]) { + if (($s =~ /\[.*\]/) || ($s =~ /\<.*\>/) || ($s =~ "!SIL")) { + print ""; + } else { + print "$s" + } + print " "; + } + print "\n"; +} + + diff --git a/egs/iban/s5/local/prepare_lm.sh b/egs/iban/s5/local/prepare_lm.sh index a19dc18f566..10d5e276aa3 100755 --- a/egs/iban/s5/local/prepare_lm.sh +++ b/egs/iban/s5/local/prepare_lm.sh @@ -10,7 +10,7 @@ set -e -o pipefail local/train_lms_srilm.sh --train-text data/train/text data/ data/srilm -nl -nrz -w10 corpus/LM/iban-bp-2012.txt | sort -R > data/local/external_text +nl -nrz -w10 corpus/LM/iban-bp-2012.txt | utils/shuffle_list.pl > data/local/external_text local/train_lms_srilm.sh --train-text data/local/external_text data/ data/srilm_external # let's do ngram interpolation of the previous two LMs @@ -21,7 +21,7 @@ for w in 0.9 0.8 0.7 0.6 0.5; do ngram -lm data/srilm/lm.gz -mix-lm data/srilm_external/lm.gz \ -lambda $w -write-lm data/srilm_interp/lm.${w}.gz echo -n "data/srilm_interp/lm.${w}.gz " - ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s + ngram -lm data/srilm_interp/lm.${w}.gz -ppl data/srilm/dev.txt | paste -s - done | sort -k15,15g > data/srilm_interp/perplexities.txt # for basic decoding, let's use only a trigram LM diff --git a/egs/iban/s5/local/train_lms_srilm.sh b/egs/iban/s5/local/train_lms_srilm.sh index 9ed88842650..f72596e750a 100755 --- a/egs/iban/s5/local/train_lms_srilm.sh +++ b/egs/iban/s5/local/train_lms_srilm.sh @@ -206,9 +206,9 @@ echo "--------------------" echo "Computing perplexity" echo "--------------------" ( - for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done - for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' ; done + for f in $tgtdir/2gram* ; do ( echo $f; ngram -order 2 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/3gram* ; do ( echo $f; ngram -order 3 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done + for f in $tgtdir/4gram* ; do ( echo $f; ngram -order 4 -lm $f -unk -map-unk "$oov_symbol" -ppl $tgtdir/dev.txt ) | paste -s -d ' ' - ; done ) | sort -r -n -k 15,15g | column -t | tee $tgtdir/perplexities.txt echo "The perlexity scores report is stored in $tgtdir/perplexities.txt " diff --git a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh index bda883f16c2..aeb0a7164e2 100755 --- a/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh +++ b/egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh @@ -10,7 +10,7 @@ set -e # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# stage=0 @@ -44,7 +44,6 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.000001 @@ -59,8 +58,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -103,7 +102,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -112,9 +111,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp done train_ivector_dir=${train_ivector_dir}_fs @@ -133,7 +132,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -147,16 +146,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -169,16 +165,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $train_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -191,7 +184,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ ${degs_dir} $dir ; fi @@ -202,7 +195,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$[x*frame_subsampling_factor] - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ @@ -219,7 +212,7 @@ if [ $stage -le 5 ]; then done done wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 fi if [ $stage -le 6 ] && $cleanup; then @@ -231,4 +224,3 @@ fi exit 0; - diff --git a/egs/librispeech/s5/local/nnet2/run_5c.sh b/egs/librispeech/s5/local/nnet2/run_5c.sh index bf261b93910..956a8f09348 100755 --- a/egs/librispeech/s5/local/nnet2/run_5c.sh +++ b/egs/librispeech/s5/local/nnet2/run_5c.sh @@ -1,7 +1,7 @@ #!/bin/bash # This is neural net training on top of adapted 40-dimensional features. -# +# train_stage=-10 use_gpu=true @@ -16,8 +16,8 @@ test_sets="dev-clean dev-other" if $use_gpu; then if ! cuda-compiled; then - cat < # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" diff --git a/egs/lre/v1/lid/train_ivector_extractor.sh b/egs/lre/v1/lid/train_ivector_extractor.sh index 8e238985f99..18f536a60cb 100755 --- a/egs/lre/v1/lid/train_ivector_extractor.sh +++ b/egs/lre/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -146,7 +146,7 @@ while [ $x -lt $num_iters ]; do nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ + $cmd --num-threads $nt $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/lre/v1/run.sh b/egs/lre/v1/run.sh index 740fad7aceb..bc0f8db572d 100755 --- a/egs/lre/v1/run.sh +++ b/egs/lre/v1/run.sh @@ -50,9 +50,9 @@ rm foo local/split_long_utts.sh --max-utt-len 120 data/train_unsplit data/train # This commented script is an alternative to the above utterance -# splitting method. Here we split the utterance based on the number of +# splitting method. Here we split the utterance based on the number of # frames which are voiced, rather than the total number of frames. -# max_voiced=3000 +# max_voiced=3000 # local/vad_split_utts.sh --max-voiced $max_voiced data/train_unsplit $mfccdir data/train use_vtln=true @@ -61,7 +61,7 @@ if $use_vtln; then cp -rt data/${t} data/${t}_novtln rm -r data/${t}_novtln/{split,.backup,spk2warp} 2>/dev/null || true steps/make_mfcc.sh --mfcc-config conf/mfcc_vtln.conf --nj 100 --cmd "$train_cmd" \ - data/${t}_novtln exp/make_mfcc $mfccdir + data/${t}_novtln exp/make_mfcc $mfccdir lid/compute_vad_decision.sh data/${t}_novtln exp/make_mfcc $mfccdir done # Vtln-related things: @@ -115,7 +115,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ # Alternatively, a diagonal UBM can replace the full UBM used above. # The preceding calls to train_diag_ubm.sh and train_full_ubm.sh # can be commented out and replaced with the following lines. -# +# # This results in a slight degradation but could improve error rate when # there is less training data than used in this example. # @@ -125,12 +125,12 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd" data/train \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 diff --git a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh index 972348766b5..aeced4fb273 100755 --- a/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh +++ b/egs/lre07/v1/lid/init_full_ubm_from_dnn.sh @@ -12,6 +12,7 @@ nj=40 cmd="run.pl" stage=-2 +cleanup=true # End configuration section. @@ -77,4 +78,11 @@ $cmd $dir/log/init.log \ "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ $dir/final.ubm || exit 1; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi + exit 0; diff --git a/egs/lre07/v1/lid/nnet2/get_egs2.sh b/egs/lre07/v1/lid/nnet2/get_egs2.sh index 27cf82bd1a1..7806dce4894 100755 --- a/egs/lre07/v1/lid/nnet2/get_egs2.sh +++ b/egs/lre07/v1/lid/nnet2/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh index 4809f42e633..533001934ab 100755 --- a/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh +++ b/egs/lre07/v1/lid/nnet2/train_multisplice_accel2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2014 Vimal Manohar @@ -9,7 +9,7 @@ # This is a modified version of train_multisplice_accel2.sh in # steps/nnet2/ for language recognition. The main difference is -# that it uses different get_lda.sh and get_egs2.sh scripts. +# that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of # train_pnorm_multisplice2.sh (still using pnorm). The "accel" refers to the @@ -25,11 +25,11 @@ num_epochs=15 # Number of epochs of training; initial_effective_lrate=0.01 final_effective_lrate=0.001 bias_stddev=0.5 -pnorm_input_dim=3000 +pnorm_input_dim=3000 pnorm_output_dim=300 minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise - # be a problem with multi-threaded update. + # be a problem with multi-threaded update. samples_per_iter=400000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh @@ -66,7 +66,7 @@ splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" # so hidden layer indexing is different from component count -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. update_period=4 # relates to online preconditioning: says how often we update the subspace. @@ -78,11 +78,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -92,7 +92,7 @@ transform_dir= # If supplied, overrides alidir feat_type= # Can be used to force "raw" features. align_cmd= # The cmd that is passed to steps/nnet2/align.sh align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no] -realign_times= # List of times on which we realign. Each time is +realign_times= # List of times on which we realign. Each time is # floating point number strictly between 0 and 1, which # will be multiplied by the num-iters to get an iteration # number. @@ -127,10 +127,10 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... note, you might have to reduce --mem" + echo " # versus your defaults, because it gets multiplied by the --num-threads argument." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" @@ -148,7 +148,7 @@ if [ $# != 4 ]; then echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -372,7 +372,7 @@ while [ $x -lt $num_iters ]; do ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process; this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);"); - echo "On iteration $x, learning rate is $this_learning_rate." + echo "On iteration $x, learning rate is $this_learning_rate." if [ ! -z "${realign_this_iter[$x]}" ]; then prev_egs_dir=$cur_egs_dir @@ -417,7 +417,7 @@ while [ $x -lt $num_iters ]; do steps/nnet2/remove_egs.sh $prev_egs_dir fi fi - + # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd $dir/log/compute_prob_valid.$x.log \ @@ -461,7 +461,7 @@ while [ $x -lt $num_iters ]; do ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. - + # We can't easily use a single parallel SGE job to do the main training, # because the computation of which archive and which --frame option # to use for each job is a little complex, so we spawn each one separately. @@ -500,7 +500,7 @@ while [ $x -lt $num_iters ]; do n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) { $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn"; undef $logprob; while () { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } } - close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; + close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1; [ -z "$n" ] && echo "Error getting best model" && exit 1; cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1; @@ -537,7 +537,7 @@ if [ $stage -le $num_iters ]; then cur_offset=0 # current offset from first_model_combine. for n in $(seq $max_models_combine); do next_offset=$[($n*$num_models_combine)/$max_models_combine] - sub_list="" + sub_list="" for o in $(seq $cur_offset $[$next_offset-1]); do iter=$[$first_model_combine+$o] mdl=$dir/$iter.mdl diff --git a/egs/lre07/v1/lid/train_diag_ubm.sh b/egs/lre07/v1/lid/train_diag_ubm.sh index 60f2452f3b7..a5e256818ce 100755 --- a/egs/lre07/v1/lid/train_diag_ubm.sh +++ b/egs/lre07/v1/lid/train_diag_ubm.sh @@ -29,7 +29,7 @@ cleanup=true min_gaussian_weight=0.0001 remove_low_count_gaussians=true # set this to false if you need #gauss to stay fixed. num_threads=32 -parallel_opts="-pe smp 32" +parallel_opts="--num-threads 32" # End configuration section. echo "$0 $@" # Print the command line for logging @@ -49,7 +49,7 @@ if [ $# != 3 ]; then echo " --stage # stage to do partial re-run from." echo " --num-gselect # Number of Gaussians per frame to" echo " # limit computation to, for speed" - echo " --subsample # In main E-M phase, use every n" + echo " --subsample # In main E-M phase, use every n" echo " # frames (a speedup)" echo " --num-frames # Maximum num-frames to keep in memory" echo " # for model initialization" @@ -59,7 +59,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" @@ -129,10 +129,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/lre07/v1/lid/train_ivector_extractor.sh b/egs/lre07/v1/lid/train_ivector_extractor.sh index 8e238985f99..55bd54bb275 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -84,7 +84,7 @@ nj_full=$[$nj*$num_processes] sdata=$data/split$nj_full; utils/split_data.sh $data $nj_full || exit 1; -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:$sdata/JOB/feats.scp ark:- | add-deltas-sdc ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -97,7 +97,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -135,27 +135,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm $dir/post.*.gz rm $dir/final.ie 2>/dev/null ln -s $x.ie $dir/final.ie diff --git a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh index 7464ce5faea..573258e7b88 100755 --- a/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh +++ b/egs/lre07/v1/lid/train_ivector_extractor_dnn.sh @@ -9,16 +9,16 @@ # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, created by the script lid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -29,8 +29,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -95,9 +95,9 @@ utils/split_data.sh $data $nj_full || exit 1; sdata_dnn=$data_dnn/split$nj_full; utils/split_data.sh $data_dnn $nj_full || exit 1; - -parallel_opts="-pe smp $[$num_threads*$num_processes]" + +parallel_opts="--num-threads $[$num_threads*$num_processes]" # Set up features. @@ -114,7 +114,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi # Do Gaussian selection and posterior extracion @@ -153,24 +153,21 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; - echo "Updating model (pass $x)" - nt=$[$num_threads*$num_processes] # use the same number of threads that - # each accumulation process uses, since we - # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + echo "Updating model (pass $x)" + nt=$[$num_threads*$num_processes] # use the same number of threads that + # each accumulation process uses, since we + # can be sure the queue will support this many. + $cmd --num-threads $nt $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done diff --git a/egs/lre07/v1/run.sh b/egs/lre07/v1/run.sh index a4ff4d909ba..8664494e558 100755 --- a/egs/lre07/v1/run.sh +++ b/egs/lre07/v1/run.sh @@ -127,12 +127,12 @@ utils/subset_data_dir.sh data/train 5000 data/train_5k utils/subset_data_dir.sh data/train 10000 data/train_10k -lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_diag_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_5k 2048 exp/diag_ubm_2048 -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=20G,ram_free=20G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 20G" \ data/train_10k exp/diag_ubm_2048 exp/full_ubm_2048_10k -lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd --mem 35G" \ data/train exp/full_ubm_2048_10k exp/full_ubm_2048 # Alternatively, a diagonal UBM can replace the full UBM used above. @@ -148,7 +148,7 @@ lid/train_full_ubm.sh --nj 30 --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ #gmm-global-to-fgmm exp/diag_ubm_2048/final.dubm \ # exp/full_ubm_2048/final.ubm -lid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +lid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --use-weights true \ --num-iters 5 exp/full_ubm_2048/final.ubm data/train \ exp/extractor_2048 @@ -162,10 +162,10 @@ utils/fix_data_dir.sh data/train_lr echo "**Language count for logistic regression training (after splitting long utterances):**" awk '{print $2}' data/train_lr/utt2lang | sort | uniq -c | sort -nr -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/train_lr exp/ivectors_train -lid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +lid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048 data/lre07 exp/ivectors_lre07 lid/run_logistic_regression.sh --prior-scale 0.70 \ diff --git a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh index a223e12333f..51fcf401cb2 100755 --- a/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh +++ b/egs/lre07/v2/local/dnn/run_nnet2_multisplice.sh @@ -19,13 +19,13 @@ set -e # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat <" + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -45,7 +46,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -55,7 +56,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -67,7 +68,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -85,7 +86,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -95,10 +96,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -118,4 +119,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/rm/s5/local/chain/run_tdnn_5g.sh b/egs/rm/s5/local/chain/run_tdnn_5g.sh index f6fbe070763..088cb3ec778 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5g.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5g.sh @@ -120,7 +120,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/rm/s5/local/chain/run_tdnn_5n.sh b/egs/rm/s5/local/chain/run_tdnn_5n.sh index 7fd7b82aa1d..7a08becd57f 100755 --- a/egs/rm/s5/local/chain/run_tdnn_5n.sh +++ b/egs/rm/s5/local/chain/run_tdnn_5n.sh @@ -25,7 +25,8 @@ num_jobs_final=4 minibatch_size=128 frames_per_eg=150 remove_egs=false - +#common_egs_dir=exp/chain/tdnn_5g/egs/ +common_egs_dir= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -121,7 +122,7 @@ if [ $stage -le 8 ]; then --trainer.optimization.initial-effective-lrate $initial_effective_lrate \ --trainer.optimization.final-effective-lrate $final_effective_lrate \ --trainer.max-param-change $max_param_change \ - --cleanup.remove-egs true \ + --cleanup.remove-egs $remove_egs \ --feat-dir data/train \ --tree-dir $treedir \ --lat-dir exp/tri3b_lats \ diff --git a/egs/rm/s5/local/nnet2/run_4b_gpu.sh b/egs/rm/s5/local/nnet2/run_4b_gpu.sh index 34a5cd34f7e..9cde9f1694e 100755 --- a/egs/rm/s5/local/nnet2/run_4b_gpu.sh +++ b/egs/rm/s5/local/nnet2/run_4b_gpu.sh @@ -16,7 +16,7 @@ If you want to use GPUs (and have them), go to src/, and configure and make on a where "nvcc" is installed. EOF -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. . utils/parse_options.sh # to parse the --stage option, if given diff --git a/egs/rm/s5/local/nnet2/run_4c.sh b/egs/rm/s5/local/nnet2/run_4c.sh index 2b580fe29d6..b3060c46ca0 100755 --- a/egs/rm/s5/local/nnet2/run_4c.sh +++ b/egs/rm/s5/local/nnet2/run_4c.sh @@ -14,20 +14,20 @@ use_gpu=true if $use_gpu; then if ! cuda-compiled; then - cat < foo; @@ -78,20 +78,20 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false --num-iters 1 - data/train_female_4k exp/full_ubm_2048 exp/full_ubm_2048_female & wait -# note, the mem_free,ram_free is counted per thread... in this setup each +# note, the --mem is counted per thread... in this setup each # job has 4 processes running each with 4 threads; each job takes about 5G # of memory so we need about 20G, plus add memory for sum-accs to make it 25G. -# but we'll submit using -pe smp 16, and this multiplies the memory requirement +# but we'll submit using --num-threads 16, and this multiplies the memory requirement # by 16, so submitting with 2G as the requirement, to make the total requirement # 32, is reasonable. # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=2G,ram_free=2G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 2G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -105,22 +105,22 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 2.58% # Extract the iVectors for the Fisher data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=3G,ram_free=3G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 3G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male exp/ivectors_sre08_test_short3_male @@ -131,7 +131,7 @@ cat $trials | awk '{print $1, $2}' | \ ivector-compute-dot-products - \ scp:exp/ivectors_sre08_train_short2_female/spk_ivector.scp \ scp:exp/ivectors_sre08_test_short3_female/spk_ivector.scp \ - foo + foo local/score_sre08.sh $trials foo diff --git a/egs/sre08/v1/run.sh b/egs/sre08/v1/run.sh index 4e31542bf4d..c4afe447e8d 100755 --- a/egs/sre08/v1/run.sh +++ b/egs/sre08/v1/run.sh @@ -110,12 +110,12 @@ sid/train_full_ubm.sh --nj 30 --remove-low-count-gaussians false \ wait # Train the iVector extractor for male speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_male/final.ubm data/train_male \ exp/extractor_2048_male # The same for female speakers. -sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=35G,ram_free=35G" \ +sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \ --num-iters 5 exp/full_ubm_2048_female/final.ubm data/train_female \ exp/extractor_2048_female @@ -129,25 +129,25 @@ sid/gender_id.sh --cmd "$train_cmd" --nj 150 exp/full_ubm_2048{,_male,_female} \ # Gender-id error rate is 3.41% # Extract the iVectors for the training data. -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/train_male exp/ivectors_train_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/train_female exp/ivectors_train_female # .. and for the SRE08 training and test data. (We focus on the main # evaluation condition, the only required one in that eval, which is # the short2-short3 eval.) -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_train_short2_female \ exp/ivectors_sre08_train_short2_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_train_short2_male \ exp/ivectors_sre08_train_short2_male -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_female data/sre08_test_short3_female \ exp/ivectors_sre08_test_short3_female -sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" --nj 50 \ +sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 50 \ exp/extractor_2048_male data/sre08_test_short3_male \ exp/ivectors_sre08_test_short3_male diff --git a/egs/sre08/v1/sid/extract_ivectors_dnn.sh b/egs/sre08/v1/sid/extract_ivectors_dnn.sh index 8692e6ee8a5..2687d1fc6c8 100755 --- a/egs/sre08/v1/sid/extract_ivectors_dnn.sh +++ b/egs/sre08/v1/sid/extract_ivectors_dnn.sh @@ -1,7 +1,7 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -16,6 +16,9 @@ stage=0 min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) posterior_scale=1.0 # This scale helps to control for successive features being highly # correlated. E.g. try 0.1 or 0.3. +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -37,6 +40,8 @@ if [ $# != 5 ]; then echo " --num-gselect # Number of Gaussians to select using" echo " # diagonal model." echo " --min-post # Pruning threshold for posteriors" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi @@ -46,6 +51,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $srcdir/final.ie $srcdir/final.ubm $data/feats.scp ; do [ ! -f $f ] && echo "No such file $f" && exit 1; done @@ -60,8 +80,6 @@ utils/split_data.sh $data_dnn $nj || exit 1; delta_opts=`cat $srcdir/delta_opts 2>/dev/null` -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -69,13 +87,18 @@ nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.s if [ $stage -le 0 ]; then echo "$0: extracting iVectors" - $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ - scale-post ark:- $posterior_scale ark:- \| \ - ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \ - ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1; + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/extract_ivectors.$g.log \ + nnet-am-compute $gpu_opt --apply-log=true --chunk-size=${chunk_size} \ + $nnet "`echo $nnet_feats | sed s/JOB/$g/g`" ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post --min-post=$min_post ark:- ark:- \| \ + scale-post ark:- $posterior_scale ark:- \| \ + ivector-extract --verbose=2 $srcdir/final.ie \ + "`echo $feats | sed s/JOB/$g/g`" ark,s,cs:- \ + ark,scp,t:$dir/ivector.$g.ark,$dir/ivector.$g.scp || exit 1 & + done + wait fi if [ $stage -le 1 ]; then diff --git a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh index f6710028ae5..c6b508a7206 100755 --- a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh +++ b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh @@ -1,18 +1,23 @@ #!/bin/bash -# Copyright 2015 David Snyder -# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) -# 2015 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2015-2017 David Snyder +# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # This script derives a full-covariance UBM from DNN posteriors and # speaker recognition features. # Begin configuration section. -nj=40 +nj=8 cmd="run.pl" stage=-2 delta_window=3 delta_order=2 +use_gpu=true +nnet_job_opt= +cleanup=true +chunk_size=256 +stage=0 # End configuration section. echo "$0 $@" # Print the command line for logging @@ -30,15 +35,34 @@ if [ $# != 4 ]; then echo " --nj # number of parallel training jobs" echo " --delta-window # delta window size" echo " --delta-order # delta order" - echo " # to be equal to the size of the DNN output layer." + echo " --use-gpu # Use GPU to extract DNN posteriors" + echo " --chunk-size # Number of frames processed at a time by the DNN" + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" exit 1; fi -data=$1 -data_dnn=$2 +data=$1 # Features for the GMM +data_dnn=$2 # Features for the DNN nnet=$3 dir=$4 +gpu_opt="" +nnet_job_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + for f in $data/feats.scp $data/vad.scp ${data_dnn}/feats.scp \ ${data_dnn}/vad.scp $nnet; do @@ -69,16 +93,34 @@ select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" # in the ancillary GMM. num_components=`grep -oP 'output-dim\ \K[0-9]+' <(nnet-am-info $nnet 2> /dev/null)` -$cmd JOB=1:$nj $logdir/make_stats.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post ark:- ark:- \| \ - fgmm-global-acc-stats-post ark:- $num_components "$feats" \ - $dir/stats.JOB.acc || exit 1; +if [ $stage -le 0 ]; then + echo "$0: accumulating stats from DNN posteriors and speaker ID features" + for g in $(seq $nj); do + $cmd $nnet_job_opt $dir/log/make_stats.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \| \ + fgmm-global-acc-stats-post ark:- $num_components \ + "`echo $feats | sed s/JOB/$g/g`" \ + $dir/stats.$g.acc || exit 1 & + done + wait +fi -$cmd $dir/log/init.log \ - fgmm-global-init-from-accs --verbose=2 \ - "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ - $dir/final.ubm || exit 1; +if [ $stage -le 1 ]; then + echo "$0: initializing GMM from stats" + $cmd $dir/log/init.log \ + fgmm-global-init-from-accs --verbose=2 \ + "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \ + $dir/final.ubm || exit 1; +fi -exit 0; +if $cleanup; then + echo "$0: removing stats" + for g in $(seq $nj); do + rm $dir/stats.$g.acc || exit 1 + done +fi diff --git a/egs/sre10/v1/local/dnn/get_egs2.sh b/egs/sre08/v1/sid/nnet2/get_egs2.sh similarity index 98% rename from egs/sre10/v1/local/dnn/get_egs2.sh rename to egs/sre08/v1/sid/nnet2/get_egs2.sh index 9f1644178e2..05ea1d1a0cd 100755 --- a/egs/sre10/v1/local/dnn/get_egs2.sh +++ b/egs/sre08/v1/sid/nnet2/get_egs2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -54,7 +54,7 @@ transform_dir= # If supplied, overrides alidir as the place to find fMLLR tr postdir= # If supplied, we will use posteriors in it as soft training targets. stage=0 -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. random_copy=false online_ivector_dir= # can be used if we are including speaker information as iVectors. @@ -83,7 +83,7 @@ if [ $# != 3 ]; then echo " # very end." echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -109,7 +109,7 @@ utils/split_data.sh $data $nj mkdir -p $dir/log $dir/info cp $alidir/tree $dir -# Get list of validation utterances. +# Get list of validation utterances. awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \ > $dir/valid_uttlist || exit 1; @@ -129,7 +129,7 @@ awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlis [ -z "$transform_dir" ] && transform_dir=$alidir -## Set up features. +## Set up features. if [ -z $feat_type ]; then if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi fi @@ -140,7 +140,7 @@ case $feat_type in valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` # caution: the top-level nnet training script should copy these to its own dir now. cp $alidir/{splice_opts,final.mat} $dir || exit 1; @@ -280,13 +280,13 @@ if [ $stage -le 3 ]; then egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark" done echo "$0: Generating training examples on disk" - # The examples will go round-robin to egs_list. + # The examples will go round-robin to egs_list. if [ ! -z $postdir ]; then $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ scp:$postdir/post.JOB.scp ark:- \| \ nnet-copy-egs ark:- $egs_list || exit 1; - else + else $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \ nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \ "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ @@ -299,7 +299,7 @@ if [ $stage -le 4 ]; then # shuffle the order, writing to the egs.JOB.ark egs_list= - for n in $(seq $nj); do + for n in $(seq $nj); do egs_list="$egs_list $dir/egs_orig.JOB.$n.ark" done diff --git a/egs/sre10/v1/local/dnn/get_lda.sh b/egs/sre08/v1/sid/nnet2/get_lda.sh similarity index 99% rename from egs/sre10/v1/local/dnn/get_lda.sh rename to egs/sre08/v1/sid/nnet2/get_lda.sh index 253222ff271..89594a20f84 100755 --- a/egs/sre10/v1/local/dnn/get_lda.sh +++ b/egs/sre08/v1/sid/nnet2/get_lda.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). # 2015 David Snyder # Apache 2.0. # @@ -108,7 +108,7 @@ N=$[$num_feats/$nj] case $feat_type in raw) feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |" ;; - lda) + lda) splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/{splice_opts,final.mat} $dir || exit 1; feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" @@ -144,7 +144,7 @@ fi echo $ivector_dim >$dir/ivector_dim if [ -z "$lda_dim" ]; then - spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" + spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)" lda_dim=$(feat-to-dim "$spliced_feats_one" -) || exit 1; fi diff --git a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh similarity index 96% rename from egs/sre10/v1/local/dnn/train_multisplice_accel2.sh rename to egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh index f5441d6e967..c56e89b5d94 100755 --- a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh +++ b/egs/sre08/v1/sid/nnet2/train_multisplice_accel2.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). +# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). # 2013 Xiaohui Zhang # 2013 Guoguo Chen # 2014 Vimal Manohar @@ -8,7 +8,7 @@ # Apache 2.0. # This is a modified version of train_multisplice_accel2.sh in -# steps/nnet2/ for speaker recognition. The main difference is +# ../../steps/nnet2/ for speaker recognition. The main difference is # that it uses different get_lda.sh and get_egs2.sh scripts. # # The original train_multisplice_accel2.sh was a modified version of @@ -25,11 +25,11 @@ num_epochs=15 # Number of epochs of training; initial_effective_lrate=0.01 final_effective_lrate=0.001 bias_stddev=0.5 -pnorm_input_dim=3000 +pnorm_input_dim=3000 pnorm_output_dim=300 minibatch_size=128 # by default use a smallish minibatch size for neural net # training; this controls instability which would otherwise - # be a problem with multi-threaded update. + # be a problem with multi-threaded update. samples_per_iter=400000 # each iteration of training, see this many samples # per job. This option is passed to get_egs.sh @@ -66,7 +66,7 @@ splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3" # so hidden layer indexing is different from component count -io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. These don't +io_opts="--max-jobs-run 5" # for jobs with a lot of I/O, limits the number running at one time. These don't randprune=4.0 # speeds up LDA. alpha=4.0 # relates to preconditioning. update_period=4 # relates to online preconditioning: says how often we update the subspace. @@ -78,11 +78,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -92,7 +92,7 @@ transform_dir= # If supplied, overrides alidir feat_type= # Can be used to force "raw" features. align_cmd= # The cmd that is passed to steps/nnet2/align.sh align_use_gpu= # Passed to use_gpu in steps/nnet2/align.sh [yes/no] -realign_times= # List of times on which we realign. Each time is +realign_times= # List of times on which we realign. Each time is # floating point number strictly between 0 and 1, which # will be multiplied by the num-iters to get an iteration # number. @@ -127,10 +127,10 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." - echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... note, you might have to reduce --mem" + echo " # versus your defaults, because it gets multiplied by the --num-threads argument." + echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" @@ -148,7 +148,7 @@ if [ $# != 4 ]; then echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." - + exit 1; fi @@ -201,7 +201,7 @@ extra_opts+=(--transform-dir $transform_dir) if [ $stage -le -4 ]; then echo "$0: calling get_lda.sh" - local/dnn/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; + sid/nnet2/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1; fi # these files will have been written by get_lda.sh feat_dim=$(cat $dir/feat_dim) || exit 1; @@ -213,7 +213,7 @@ if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then extra_opts+=(--left-context $nnet_left_context ) extra_opts+=(--right-context $nnet_right_context ) echo "$0: calling get_egs2.sh" - local/dnn/get_egs2.sh $egs_opts "${extra_opts[@]}" \ + sid/nnet2/get_egs2.sh $egs_opts "${extra_opts[@]}" \ --samples-per-iter $samples_per_iter --stage $get_egs_stage \ --io-opts "$io_opts" \ --cmd "$cmd" $egs_opts \ @@ -372,7 +372,7 @@ while [ $x -lt $num_iters ]; do ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process; this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);"); - echo "On iteration $x, learning rate is $this_learning_rate." + echo "On iteration $x, learning rate is $this_learning_rate." if [ ! -z "${realign_this_iter[$x]}" ]; then prev_egs_dir=$cur_egs_dir @@ -417,7 +417,7 @@ while [ $x -lt $num_iters ]; do steps/nnet2/remove_egs.sh $prev_egs_dir fi fi - + # Set off jobs doing some diagnostics, in the background. # Use the egs dir from the previous iteration for the diagnostics $cmd $dir/log/compute_prob_valid.$x.log \ @@ -461,7 +461,7 @@ while [ $x -lt $num_iters ]; do ( # this sub-shell is so that when we "wait" below, # we only wait for the training jobs that we just spawned, # not the diagnostic jobs that we spawned above. - + # We can't easily use a single parallel SGE job to do the main training, # because the computation of which archive and which --frame option # to use for each job is a little complex, so we spawn each one separately. @@ -500,7 +500,7 @@ while [ $x -lt $num_iters ]; do n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) { $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn"; undef $logprob; while () { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } } - close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; + close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1; [ -z "$n" ] && echo "Error getting best model" && exit 1; cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1; @@ -537,7 +537,7 @@ if [ $stage -le $num_iters ]; then cur_offset=0 # current offset from first_model_combine. for n in $(seq $max_models_combine); do next_offset=$[($n*$num_models_combine)/$max_models_combine] - sub_list="" + sub_list="" for o in $(seq $cur_offset $[$next_offset-1]); do iter=$[$first_model_combine+$o] mdl=$dir/$iter.mdl diff --git a/egs/sre08/v1/sid/train_diag_ubm.sh b/egs/sre08/v1/sid/train_diag_ubm.sh index 6ff1a9099d9..1e79fc10c99 100755 --- a/egs/sre08/v1/sid/train_diag_ubm.sh +++ b/egs/sre08/v1/sid/train_diag_ubm.sh @@ -60,7 +60,7 @@ if [ $# != 3 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" + echo " --parallel-opts # Option should match number of threads in" echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" @@ -85,7 +85,7 @@ for f in $data/feats.scp $data/vad.scp; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1 done -parallel_opts="-pe smp $num_threads" +parallel_opts="--num-threads $num_threads" delta_opts="--delta-window=$delta_window --delta-order=$delta_order" echo $delta_opts > $dir/delta_opts @@ -135,10 +135,11 @@ for x in `seq 0 $[$num_iters-1]`; do $cmd $dir/log/update.$x.log \ gmm-global-est $opt --min-gaussian-weight=$min_gaussian_weight $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc|" \ $dir/$[$x+1].dubm || exit 1; - rm $dir/$x.*.acc $dir/$x.dubm + $cleanup && rm $dir/$x.*.acc $dir/$x.dubm fi done -rm $dir/gselect.*.gz +$cleanup && rm $dir/gselect.*.gz + mv $dir/$num_iters.dubm $dir/final.dubm || exit 1; exit 0; diff --git a/egs/sre08/v1/sid/train_ivector_extractor.sh b/egs/sre08/v1/sid/train_ivector_extractor.sh index 5d7eb984485..68ba0ca65fd 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor.sh @@ -13,7 +13,7 @@ # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -24,8 +24,8 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we +nj=10 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we # run is nj * num_processes * num_threads, and the number of # separate pieces of data is nj * num_processes. num_threads=4 @@ -90,7 +90,7 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -102,7 +102,7 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1 -fi +fi # Do Gaussian selection and posterior extracion @@ -140,26 +140,24 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done - +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh index 64579735376..2ce915a0750 100755 --- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh +++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh @@ -1,23 +1,23 @@ #!/bin/bash # Copyright 2013 Daniel Povey -# 2014-2015 David Snyder +# 2014-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. # This script trains the i-vector extractor using a DNN-based UBM. It also requires # an fGMM, usually created by the script sid/init_full_gmm_from_dnn.sh. -# Note: there are 3 separate levels of parallelization: num_threads, num_processes, -# and num_jobs. This may seem a bit excessive. It has to do with minimizing -# memory usage and disk I/O, subject to various constraints. The "num_threads" +# Note: there are 3 separate levels of parallelization: num_threads, num_processes, +# and num_jobs. This may seem a bit excessive. It has to do with minimizing +# memory usage and disk I/O, subject to various constraints. The "num_threads" # is how many threads a program uses; the "num_processes" is the number of separate # processes a single job spawns, and then sums the accumulators in memory. # Our recommendation: # - Set num_threads to the minimum of (4, or how many virtual cores your machine has). # (because of needing to lock various global quantities, the program can't # use many more than 4 threads with good CPU utilization). -# - Set num_processes to the number of virtual cores on each machine you have, divided by +# - Set num_processes to the number of virtual cores on each machine you have, divided by # num_threads. E.g. 4, if you have 16 virtual cores. If you're on a shared queue # that's busy with other people's jobs, it may be wise to set it to rather less # than this maximum though, or your jobs won't get scheduled. And if memory is @@ -28,12 +28,12 @@ # may want more jobs, though. # Begin configuration section. -nj=10 # this is the number of separate queue jobs we run, but each one - # contains num_processes sub-jobs.. the real number of threads we - # run is nj * num_processes * num_threads, and the number of - # separate pieces of data is nj * num_processes. +nj=5 # this is the number of separate queue jobs we run, but each one + # contains num_processes sub-jobs.. the real number of threads we + # run is nj * num_processes * num_threads, and the number of + # separate pieces of data is nj * num_processes. num_threads=4 -num_processes=4 # each job runs this many processes, each with --num-threads threads +num_processes=2 # each job runs this many processes, each with --num-threads threads cmd="run.pl" stage=-4 num_gselect=20 # Gaussian-selection using diagonal model: number of Gaussians to select @@ -46,6 +46,9 @@ cleanup=true posterior_scale=1.0 # This scale helps to control for successve features being highly # correlated. E.g. try 0.1 or 0.3 sum_accs_opt= +use_gpu=true +chunk_size=256 +nnet_job_opt= # End configuration section. echo "$0 $@" # Print the command line for logging @@ -71,6 +74,9 @@ if [ $# != 5 ]; then echo " # diagonal model." echo " --sum-accs-opt # Option e.g. '-l hostname=a15' to localize" echo " # sum-accs process to nfs server." + echo " --nnet-job-opt # Options for the DNN jobs which add to or" + echo " # replace those specified by --cmd" + echo " --chunk-size # Number of frames processed at a time by the DNN" exit 1; fi @@ -80,6 +86,21 @@ data=$3 data_dnn=$4 dir=$5 +gpu_opt="" +if $use_gpu; then + nnet_job_opt="$nnet_job_opt --gpu 1" + gpu_opt="--use-gpu=yes" + if ! cuda-compiled; then + echo "$0: WARNING: you are trying to use the GPU but you have not compiled" + echo " for CUDA. If you have GPUs and have nvcc installed, go to src/" + echo " and do ./configure; make" + exit 1 + fi +else + echo "$0: without using a GPU this will be slow." + gpu_opt="--use-gpu=no" +fi + srcdir=$(dirname $fgmm_model) for f in $fgmm_model $data/feats.scp ; do @@ -100,9 +121,7 @@ if [ -f $srcdir/delta_opts ]; then cp $srcdir/delta_opts $dir/ 2>/dev/null fi -splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options - -parallel_opts="-pe smp $[$num_threads*$num_processes]" +parallel_opts="--num-threads $[$num_threads*$num_processes]" ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |" @@ -117,19 +136,24 @@ if [ $stage -le -2 ]; then $cmd $dir/log/init.log \ ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \ $dir/final.ubm $dir/0.ie || exit 1; -fi +fi # Do Gaussian selection and posterior extraction if [ $stage -le -1 ]; then echo $nj_full > $dir/num_jobs echo "$0: doing DNN posterior computation" - $cmd JOB=1:$nj_full $dir/log/post.JOB.log \ - nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \ - \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \ - \| logprob-to-post --min-post=$min_post ark,s,cs:- ark:- \| \ - scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.JOB.gz" || exit 1; - + for g in $(seq $nj_full); do + $cmd $nnet_job_opt $dir/log/post.$g.log \ + nnet-am-compute $gpu_opt \ + --chunk-size=${chunk_size} --apply-log=true $nnet \ + "`echo $nnet_feats | sed s/JOB/$g/g`" \ + ark:- \ + \| select-voiced-frames ark:- scp,s,cs:$sdata/$g/vad.scp ark:- \ + \| logprob-to-post ark:- ark:- \ + \| scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.$g.gz" || exit 1 & + done + wait else if ! [ $nj_full -eq $(cat $dir/num_jobs) ]; then echo "Num-jobs mismatch $nj_full versus $(cat $dir/num_jobs)" @@ -156,26 +180,25 @@ while [ $x -lt $num_iters ]; do done wait [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1; - accs="" - for j in $(seq $nj); do - accs+="$dir/acc.$x.$j " - done - echo "Summing accs (pass $x)" - $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ - ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; + accs="" + for j in $(seq $nj); do + accs+="$dir/acc.$x.$j " + done + echo "Summing accs (pass $x)" + $cmd $sum_accs_opt $dir/log/sum_acc.$x.log \ + ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1; echo "Updating model (pass $x)" nt=$[$num_threads*$num_processes] # use the same number of threads that # each accumulation process uses, since we # can be sure the queue will support this many. - $cmd -pe smp $nt $dir/log/update.$x.log \ - ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; - rm $dir/acc.$x.* - if $cleanup; then - rm $dir/acc.$x - # rm $dir/$x.ie - fi + $cmd $parallel_opts $dir/log/update.$x.log \ + ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; + rm $dir/acc.$x.* + $cleanup && rm $dir/acc.$x $dir/$x.ie fi x=$[$x+1] done +$cleanup && rm -f $dir/post.*.gz +rm -f $dir/final.ie ln -s $x.ie $dir/final.ie diff --git a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh index 684cc8ddfc0..97b9789af0c 100755 --- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh +++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh @@ -4,56 +4,52 @@ # egs/fisher_english/s5/local/online. It has been modified # for speaker recognition. -. cmd.sh - - stage=1 train_stage=-10 use_gpu=true set -e -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh # assume use_gpu=true since it would be way too slow otherwise. if ! cuda-compiled; then - cat < local/scores_gmm_2048_dep_pooled/plda_scores +# Pool the gender dependent results. +mkdir -p exp/scores_gmm_2048_dep_pooled +cat exp/scores_gmm_2048_dep_male/plda_scores exp/scores_gmm_2048_dep_female/plda_scores \ + > exp/scores_gmm_2048_dep_pooled/plda_scores # GMM-2048 PLDA EER # ind pooled: 2.26 @@ -140,7 +141,7 @@ cat local/scores_gmm_2048_dep_male/plda_scores local/scores_gmm_2048_dep_female/ echo "GMM-$num_components EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_gmm_${num_components}_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done diff --git a/egs/sre10/v2/cmd.sh b/egs/sre10/v2/cmd.sh index 5c38b3a5d77..fe4cd0bcb3f 100755 --- a/egs/sre10/v2/cmd.sh +++ b/egs/sre10/v2/cmd.sh @@ -6,10 +6,10 @@ # the number of cpus on your machine. #a) JHU cluster options -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" -#export cuda_cmd="..." -export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +#export cuda_cmd="queue --gpu 1" +export mkgraph_cmd="queue.pl --mem 4G" #b) BUT cluster options #export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" diff --git a/egs/sre10/v2/run.sh b/egs/sre10/v2/run.sh index 4f5ab2756bb..b6c24fc1371 100755 --- a/egs/sre10/v2/run.sh +++ b/egs/sre10/v2/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2015-2016 David Snyder +# Copyright 2015-2017 David Snyder # 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) # 2015 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0. @@ -105,62 +105,61 @@ utils/fix_data_dir.sh data/train_32k # Initialize a full GMM from the DNN posteriors and speaker recognition # features. This can be used both alone, as a UBM, or to initialize the # i-vector extractor in a DNN-based system. -sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" \ +sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd --mem 15G" \ data/train_32k \ data/train_dnn_32k $nnet exp/full_ubm # Train an i-vector extractor based on just the supervised-GMM. sid/train_ivector_extractor.sh \ - --cmd "$train_cmd -l mem_free=70G,ram_free=70G" \ + --cmd "$train_cmd --mem 120G" \ --ivector-dim 600 \ --num-iters 5 exp/full_ubm/final.ubm data/train \ exp/extractor_sup_gmm # Train an i-vector extractor based on the DNN-UBM. sid/train_ivector_extractor_dnn.sh \ - --cmd "$train_cmd -l mem_free=80G,ram_free=80G" \ - --min-post 0.015 \ - --ivector-dim 600 \ - --num-iters 5 exp/full_ubm/final.ubm $nnet \ + --cmd "$train_cmd --mem 100G" --nnet-job-opt "--mem 4G" \ + --min-post 0.015 --ivector-dim 600 --num-iters 5 \ + exp/full_ubm/final.ubm $nnet \ data/train \ data/train_dnn \ exp/extractor_dnn # Extract i-vectors from the extractor with the sup-GMM UBM. sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_train \ exp/ivectors_sre10_train_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre10_test \ exp/ivectors_sre10_test_sup_gmm sid/extract_ivectors.sh \ - --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 40 \ + --cmd "$train_cmd --mem 12G" --nj 40 \ exp/extractor_sup_gmm data/sre \ exp/ivectors_sre_sup_gmm # Extract i-vectors using the extractor with the DNN-UBM. sid/extract_ivectors_dnn.sh \ - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_test \ data/sre10_test_dnn \ exp/ivectors10_test_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre10_train \ data/sre10_train_dnn \ exp/ivectors10_train_dnn -sid/extract_ivectors_dnn.sh - --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \ +sid/extract_ivectors_dnn.sh \ + --cmd "$train_cmd --mem 15G" --nj 10 \ exp/extractor_dnn \ $nnet \ data/sre \ @@ -183,87 +182,90 @@ local/scoring_common.sh data/sre data/sre10_train data/sre10_test \ # # local/cosine_scoring.sh data/sre10_train data/sre10_test \ # exp/ivectors_sre10_train exp/ivectors_sre10_test $trials \ -# local/scores_gmm_2048_ind_pooled +# exp/scores_gmm_2048_ind_pooled # local/lda_scoring.sh data/sre data/sre10_train data/sre10_test \ # exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test \ -# $trials local/scores_gmm_2048_ind_pooled +# $trials exp/scores_gmm_2048_ind_pooled # Create a gender independent PLDA model and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm \ - exp/ivectors_sre10_test_sup_gmm $trials local/scores_sup_gmm_ind_pooled + exp/ivectors_sre10_test_sup_gmm $trials exp/scores_sup_gmm_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_ind_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_ind_male + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_ind_male # Create gender dependent PLDA models and do scoring with the sup-GMM system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \ - exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_dep_female + exp/ivectors_sre10_test_sup_gmm_female $trials_female exp/scores_sup_gmm_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \ - exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_dep_male -mkdir -p local/scores_sup_gmm_dep_pooled -cat local/scores_sup_gmm_dep_male/plda_scores local/scores_sup_gmm_dep_female/plda_scores \ - > local/scores_sup_gmm_dep_pooled/plda_scores + exp/ivectors_sre10_test_sup_gmm_male $trials_male exp/scores_sup_gmm_dep_male + +# Pool the gender dependent results +mkdir -p exp/scores_sup_gmm_dep_pooled +cat exp/scores_sup_gmm_dep_male/plda_scores exp/scores_sup_gmm_dep_female/plda_scores \ + > exp/scores_sup_gmm_dep_pooled/plda_scores # Create a gender independent PLDA model and do scoring with the DNN system. local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn \ - exp/ivectors_sre10_test_dnn $trials local/scores_dnn_ind_pooled + exp/ivectors_sre10_test_dnn $trials exp/scores_dnn_ind_pooled local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_ind_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_ind_female local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_ind_male + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_ind_male # Create gender dependent PLDA models and do scoring with the DNN system. local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \ - exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_dep_female + exp/ivectors_sre10_test_dnn_female $trials_female exp/scores_dnn_dep_female local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \ exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \ - exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_dep_male -mkdir -p local/scores_dnn_dep_pooled -cat local/scores_dnn_dep_male/plda_scores local/scores_dnn_dep_female/plda_scores \ - > local/scores_dnn_dep_pooled/plda_scores + exp/ivectors_sre10_test_dnn_male $trials_male exp/scores_dnn_dep_male + +mkdir -p exp/scores_dnn_dep_pooled +cat exp/scores_dnn_dep_male/plda_scores exp/scores_dnn_dep_female/plda_scores \ + > exp/scores_dnn_dep_pooled/plda_scores # Sup-GMM PLDA EER # ind pooled: 1.72 # ind female: 1.81 -# ind male: 1.56 -# dep female: 1.89 -# dep male: 1.39 -# dep pooled: 1.65 -echo "Sup-GMM-$num_components EER" +# ind male: 1.70 +# dep female: 2.03 +# dep male: 1.50 +# dep pooled: 1.79 +echo "Sup-GMM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done -# DNN PLDA EER -# ind pooled: 1.05 -# ind female: 1.33 -# ind male: 0.75 -# dep female: 1.41 -# dep male: 0.64 -# dep pooled: 1.02 -echo "DNN-$num_components EER" +# DNN-UBM EER +# ind pooled: 1.01 +# ind female: 1.16 +# ind male: 0.78 +# dep female: 1.27 +# dep male: 0.61 +# dep pooled: 0.96 +echo "DNN-UBM EER" for x in ind dep; do for y in female male pooled; do - eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` + eer=`compute-eer <(python local/prepare_for_eer.py $trials exp/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null` echo "${x} ${y}: $eer" done done # In comparison, here is the EER for an unsupervised GMM-based system -# with 5297 components (the same as the number of senones in the DNN): +# with 5297 components (about the same as the number of senones in the DNN): # GMM-5297 PLDA EER # ind pooled: 2.25 # ind female: 2.33 diff --git a/egs/swahili/s5/cmd.sh b/egs/swahili/s5/cmd.sh index ab1c23f76ef..8c9422b92bc 100755 --- a/egs/swahili/s5/cmd.sh +++ b/egs/swahili/s5/cmd.sh @@ -1,5 +1,5 @@ # JHU cluster options -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" -export cuda_cmd="..." -export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +export cuda_cmd="queue --gpu 1" +export mkgraph_cmd="queue.pl --mem 4G" diff --git a/egs/swbd/README.txt b/egs/swbd/README.txt index fc61a4c3060..1da570274e4 100644 --- a/egs/swbd/README.txt +++ b/egs/swbd/README.txt @@ -10,11 +10,14 @@ About the Switchboard corpus We are using the eval2000 a.k.a. hub5'00 evaluation data. The acoustics are LDC2002S09 and the text is LDC2002T43. + We are also using the RT'03 test set, available as LDC2007S10. Note: not + all parts of the recipe test with this. + About the Fisher corpus for language modeling We use Fisher English training speech transcripts for language modeling, if they are available. The catalog number for part 1 transcripts is LDC2004T19, - and LDC2005T19 for part 2. + and LDC2005T19 for part 2. Each subdirectory of this directory contains the scripts for a sequence of experiments. @@ -24,4 +27,3 @@ scripts for a sequence of experiments. s5b: This is (somewhat less) out of date, please see s5c s5c: This is the current recipe. - diff --git a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh index 940c99538cb..3aae7918964 100755 --- a/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5a_gpu.sh @@ -18,7 +18,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/nnet5a_gpu/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh index 50f79208897..74058d9fac4 100755 --- a/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5b_gpu.sh @@ -66,7 +66,7 @@ if [ $stage -le 2 ]; then steps/nnet2/train_block.sh --stage "$train_stage" \ --num-threads 1 --max-change 40.0 --minibatch-size 512 \ - --parallel-opts "-l gpu=1" \ + --parallel-opts "--gpu 1" \ --initial-learning-rate 0.01 --final-learning-rate 0.001 \ --num-epochs 10 --num-epochs-extra 5 \ --cmd "$decode_cmd" \ diff --git a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh index 36f72b77083..55becfbe0fc 100755 --- a/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5c_gpu.sh @@ -20,7 +20,7 @@ EOF ( if [ ! -f exp/nnet5c_gpu/final.mdl ]; then - steps/nnet2/train_tanh.sh --cmd "$decode_cmd" --parallel-opts "-l gpu=1" --io-opts "--max-jobs-run 5" \ + steps/nnet2/train_tanh.sh --cmd "$decode_cmd" --parallel-opts "--gpu 1" --io-opts "--max-jobs-run 5" \ --num-threads 1 --minibatch-size 512 --max-change 40.0 --mix-up 20000 --samples-per-iter 300000 \ --num-epochs 10 --num-epochs-extra 3 --initial-learning-rate 0.0067 --final-learning-rate 0.00067 \ --num-jobs-nnet 10 --num-hidden-layers 5 --hidden-layer-dim 1536 data/train_nodup data/lang \ diff --git a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh index 5364f14bcb6..e0b523910df 100755 --- a/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5d_gpu.sh @@ -18,7 +18,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh index 545c80c0e1c..77de59b90ff 100755 --- a/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5e_gpu.sh @@ -18,7 +18,7 @@ train_stage=-10 . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh index 3cc315a9775..b91599a27e6 100755 --- a/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_5f_gpu.sh @@ -9,7 +9,7 @@ dir=nnet5f_gpu . ./cmd.sh . ./path.sh . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. ( if [ ! -f exp/$dir/final.mdl ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh index 712c8e79c5b..6327ee85224 100755 --- a/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_6a_gpu.sh @@ -21,7 +21,7 @@ EOF . utils/parse_options.sh -parallel_opts="-l gpu=1" # This is suitable for the CLSP network, you'll likely have to change it. +parallel_opts="--gpu 1" # This is suitable for the CLSP network, you'll likely have to change it. alidir=exp/nnet5a_ali_100k_nodup if [ ! -f $alidir/.done ]; then diff --git a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh index 8324051279b..0296f4cca00 100755 --- a/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh +++ b/egs/swbd/s5b/local/nnet2/run_6c_gpu.sh @@ -7,7 +7,7 @@ # directory name. -gpu_opts="-l gpu=1" # This is suitable for the CLSP network, +gpu_opts="--gpu 1" # This is suitable for the CLSP network, # you'll likely have to change it. we'll # use it later on, in the training (it's # not used in denlat creation) @@ -18,8 +18,8 @@ set -e # exit on error. . ./cmd.sh . ./path.sh -! cuda-compiled && cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -110,7 +109,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -119,9 +118,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -140,7 +139,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -154,16 +153,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -176,16 +172,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -198,7 +191,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ --modify-learning-rates false \ ${degs_dir} $dir ; fi @@ -210,7 +203,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ @@ -235,4 +228,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh new file mode 100755 index 00000000000..1e673f8e01a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_blstm_6k.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# 6k is same as 6j, but with the fast lstm layers + +# local/chain/compare_wer_general.sh blstm_6j_sp blstm_6k_sp +# System blstm_6j_sp blstm_6k_sp +# WER on train_dev(tg) 13.80 13.25 +# WER on train_dev(fg) 12.64 12.27 +# WER on eval2000(tg) 15.6 15.7 +# WER on eval2000(fg) 14.2 14.5 +# Final train prob -0.055 -0.052 +# Final valid prob -0.077 -0.080 +# Final train prob (xent) -0.777 -0.743 +# Final valid prob (xent) -0.9126 -0.8816 + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/blstm_6k # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh new file mode 100755 index 00000000000..b9b7152dcbe --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_lstm_6k.sh @@ -0,0 +1,304 @@ +#!/bin/bash + +# Copyright 2015 Johns Hopkins University (Author: Daniel Povey). +# 2015 Vijayaditya Peddinti +# 2015 Xingyu Na +# 2015 Pegah Ghahrmani +# 2017 Google Inc. (vpeddinti@google.com) +# Apache 2.0. + + + +# run_lstm_6k.sh is like run_lstm_6j.sh but making +# various kaldi-5.1-related upgrades to the script. +# For the list of changes compare tuning/run_tdnn_lstm_1{c,d}.sh + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/lstm_6k # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + # Note : The delay variable will be used just in the init.config. + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat delay=$label_delay + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh index 85afa7bf9ca..25c6841c0a9 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_6h_discriminative.sh @@ -78,14 +78,13 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.000000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 ## Decode options @@ -93,8 +92,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci if $use_gpu; then if ! cuda-compiled; then - cat </dev/null || true data_dirs= - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \ $x $train_data_dir exp/shift_hires/ mfcc_hires utils/fix_data_dir.sh ${train_data_dir}_fs$x @@ -137,7 +136,7 @@ if [ $frame_subsampling_factor -ne 1 ]; then awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done utils/combine_data.sh ${train_data_dir}_fs $data_dirs - for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do + for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do rm -r ${train_data_dir}_fs$x done fi @@ -146,9 +145,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then affix=_fs fi - + rm ${online_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true -for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do +for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do awk -v nfs=$x '{print "fs"nfs"-"$0}' $online_ivector_dir/ivector_online.scp >> ${online_ivector_dir}_fs/ivector_online.scp done online_ivector_dir=${online_ivector_dir}_fs @@ -167,7 +166,7 @@ fi if [ -z "$lats_dir" ]; then lats_dir=${srcdir}_denlats${affix} if [ $stage -le 2 ]; then - nj=50 + nj=50 # this doesn't really affect anything strongly, except the num-jobs for one of # the phases of get_egs_discriminative.sh below. num_threads_denlats=6 @@ -181,16 +180,13 @@ if [ -z "$lats_dir" ]; then fi fi -model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` -model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` +model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'` +model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'` left_context=$[model_left_context + extra_left_context] right_context=$[model_right_context + extra_right_context] -valid_left_context=$[valid_left_context + frames_per_eg] -valid_right_context=$[valid_right_context + frames_per_eg] - -cmvn_opts=`cat $srcdir/cmvn_opts` +cmvn_opts=`cat $srcdir/cmvn_opts` if [ -z "$degs_dir" ]; then degs_dir=${srcdir}_degs${affix} @@ -203,16 +199,13 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ --adjust-priors false --acwt 1.0 \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ - --valid-left-context $valid_left_context --valid-right-context $valid_right_context \ - --priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + $frame_subsampling_opt \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -225,7 +218,7 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" --use-frame-shift false \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \ + --adjust-priors false \ --modify-learning-rates false \ ${degs_dir} $dir ; fi @@ -237,7 +230,7 @@ if [ $stage -le 5 ]; then ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` iter=epoch$x.adj - + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --acwt 1.0 --post-decode-acwt 10.0 \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ @@ -262,4 +255,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh index 59bc2c64f70..9dfaa1d4509 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7h.sh @@ -9,7 +9,14 @@ #Final valid prob -0.110475 -0.113102 #Final train prob (xent) -1.20065 -1.2533 #Final valid prob (xent) -1.3313 -1.36743 -# + +# Online decoding +# System tdnn_7h_sp tdnn_7h_sp_online +# WER on train_dev(tg) 13.96 13.95 +# WER on train_dev(fg) 12.86 12.82 +# WER on eval2000(tg) 16.5 16.5 +# WER on eval2000(fg) 14.8 14.8 + set -e # configs for 'chain' @@ -20,6 +27,7 @@ get_egs_stage=-10 speed_perturb=true dir=exp/chain/tdnn_7h # Note: _sp will get added to this if $speed_perturb == true. decode_iter= +decode_nj=50 # training options num_epochs=4 @@ -36,6 +44,8 @@ remove_egs=false common_egs_dir= xent_regularize=0.1 +test_online_decoding=false # if true, it will run the last decoding stage. + # End configuration section. echo "$0 $@" # Print the command line for logging @@ -193,26 +203,65 @@ if [ $stage -le 14 ]; then utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg fi -decode_suff=sw1_tg + graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi + rm $dir/.error 2>/dev/null || true for decode_set in train_dev eval2000; do ( steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj 50 --cmd "$decode_cmd" $iter_opts \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_${decode_suff} || exit 1; + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; if $has_fisher; then steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; fi - ) & + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 16 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi fi -wait; + + exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh index 9aec95393d1..793b40f7fe3 100755 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_7j.sh @@ -126,12 +126,12 @@ if [ $stage -le 12 ]; then # the first splicing is moved before the lda layer, so no splicing here relu-renorm-layer name=tdnn1 dim=768 - tdnn-relu-renorm-layer name=tdnn2 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn3 splice-indexes=-1,0,1 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn4 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn5 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn6 splice-indexes=-3,0,3 dim=768 subset-dim=384 - tdnn-relu-renorm-layer name=tdnn7 splice-indexes=-3,0,3 dim=768 subset-dim=384 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=768 subset-dim=384 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=768 subset-dim=384 ## adding the layers for chain branch relu-renorm-layer name=prefinal-chain input=tdnn7 dim=768 target-rms=0.5 diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh new file mode 100755 index 00000000000..12b63b7e96a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_blstm_1a.sh @@ -0,0 +1,245 @@ +#!/bin/bash + +# tdnn_blstm_1a is same as blstm_6k, but with the initial tdnn layers +# blstm_6k : num-parameters: 41155430 +# tdnn_blstm_1a : num-parameters: 53688166 + +# local/chain/compare_wer_general.sh blstm_6l_sp blstm_6k_sp +# System blstm_6k_sp tdnn_blstm_1a_sp +# WER on train_dev(tg) 13.25 12.95 +# WER on train_dev(fg) 12.27 11.98 +# WER on eval2000(tg) 15.7 15.5 +# WER on eval2000(fg) 14.5 14.1 +# Final train prob -0.052 -0.041 +# Final valid prob -0.080 -0.072 +# Final train prob (xent) -0.743 -0.629 +# Final valid prob (xent) -0.8816 -0.8091 + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_blstm_1a # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_dir_affix= + +# training options +leftmost_questions_truncate=-1 +chunk_width=150 +chunk_left_context=40 +chunk_right_context=40 +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=0 + +# decode options +extra_left_context=50 +extra_right_context=50 +frames_per_chunk= + +remove_egs=false +common_egs_dir= + +affix= +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=blstm1-forward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm1-backward input=tdnn3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64 \ + --trainer.frames-per-iter 1200000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + +decode_suff=sw1_tg +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + iter_opts= + if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " + fi + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --frames-per-chunk "$frames_per_chunk" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_${decode_suff} || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_dir_affix:+_$decode_dir_affix}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh old mode 100644 new mode 100755 index b305c57b6ab..d71301eb102 --- a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -5,15 +5,19 @@ # it's faster. See PR #1243 on github, and issue #1237. # This used to be called run_tdnn_fastlstm_1b.sh. -#System tdnn_lstm_1a_ld5 tdnn_lstm_1b_ld5 tdnn_lstm_1c_ld5 -#WER on train_dev(tg) 13.42 13.00 12.91 -#WER on train_dev(fg) 12.42 12.03 11.98 -#WER on eval2000(tg) 15.7 15.3 15.2 -#WER on eval2000(fg) 14.2 13.9 13.8 -#Final train prob -0.0538088 -0.056294 -0.050 -#Final valid prob -0.0800484 -0.0813322 -0.092 -#Final train prob (xent) -0.7603 -0.777787 -0.756 -#Final valid prob (xent) -0.949909 -0.939146 -0.983 +## note: the last column below was this run on Feb 1 2017, in the +## shortcut branch. Results are a bit worse, but I believe this is just +## random noise or a little bit of mean-regression. + +#System tdnn_lstm_1a_ld5_sp tdnn_lstm_1b_ld5_sp tdnn_lstm_1c_ld5_sp tdnn_lstm_1c_ld5_sp +#WER on train_dev(tg) 13.42 13.00 12.91 13.17 +#WER on train_dev(fg) 12.42 12.03 11.98 12.25 +#WER on eval2000(tg) 15.7 15.3 15.2 15.4 +#WER on eval2000(fg) 14.2 13.9 13.8 14.1 +#Final train prob -0.0538088 -0.056294 -0.050 -0.046 +#Final valid prob -0.0800484 -0.0813322 -0.092 -0.073 +#Final train prob (xent) -0.7603 -0.777787 -0.756 -0.749 +#Final valid prob (xent) -0.949909 -0.939146 -0.983 -0.980 set -e diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..22c7d2e582d --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,288 @@ +#!/bin/bash + + +# run_tdnn_lstm_1d.sh is like run_tdnn_lstm_1c.sh but making +# various kaldi-5.1-related upgrades to the script: +# change chunk-width to be variable, add extra_left_context_initial=0 +# and extra_right_context_final=0; add looped decoding. +# Also changed frames-per-iter from 1.2 million to 1.5 million... this +# might have been a mistake, trying 1 million in 1f to see if this matters. + +# The comparison below is with a version of the 1c system that was run at about +# the same time. The degradation in log-likelihood and xent prob is likely because +# now on average the chunk-size is slightly smaller than before (150 -> 136); +# possibly the change in extra-(left,right) context has a similar effect +# (or maybe it's just because the validation and train-subset examples have changed). + + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# System tdnn_lstm_1c_ld5_sp tdnn_lstm_1d_sp +# WER on train_dev(tg) 13.17 12.90 +# [looped:] 13.01 +# WER on train_dev(fg) 12.25 11.90 +# [looped:] 12.13 +# WER on eval2000(tg) 15.4 15.7 +# [looped:] 15.7 +# WER on eval2000(fg) 14.1 14.2 +# [looped:] 14.4 +# Final train prob -0.046 -0.064 +# Final valid prob -0.073 -0.088 +# Final train prob (xent) -0.749 -0.836 +# Final valid prob (xent) -0.9084 -0.9631 + +# run_tdnn_lstm_1c.sh is like run_tdnn_lstm_1b.sh but using the +# new 'fast-lstm' layer. Results are slightly improved, plus +# it's faster. See PR #1243 on github, and issue #1237. +# This used to be called run_tdnn_fastlstm_1b.sh. + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1d # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..6987757757a --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,327 @@ +#!/bin/bash + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# There seems to be no consistent difference in WER. Inconclusive. +# However I may keep 0.01 just for consistency with other setups. +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# System tdnn_lstm_1d_sp tdnn_lstm_1e_sp +# WER on train_dev(tg) 12.90 12.74 +# [looped:] 13.01 12.93 +# WER on train_dev(fg) 11.90 11.70 +# [looped:] 12.13 12.09 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.7 15.9 +# WER on eval2000(fg) 14.2 14.3 +# [looped:] 14.4 14.6 +# Final train prob -0.064 -0.066 +# Final valid prob -0.088 -0.087 +# Final train prob (xent) -0.836 -0.931 +# Final valid prob (xent) -0.9631 -1.0279 + +# Online decoding +# System tdnn_lstm_1e_sp_online tdnn_lstm_1e_sp +# WER on train_dev(tg) 12.93 12.74 +# WER on train_dev(fg) 12.05 11.87 +# WER on eval2000(tg) 15.5 15.4 +# WER on eval2000(fg) 14.0 13.8 + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1e # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..90e179379e4 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_tdnn_lstm_1f.sh is like run_tdnn_lstm_1e.sh but +# reducing the frames-per-iter from 1.5 million to 1 million, +# since the time per iter was more than usual (about 5 minutes). + +# Below, the WER seems to get a little worse, although the optimization +# is improved slightly. There seems to be more train/valid difference. +# see also 1i. + +# exp/chain/tdnn_lstm_1f_sp: num-iters=392 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.080->-0.073 xent:train/valid[260,391,final]=(-1.06,-0.903,-0.916/-1.13,-1.03,-1.04) logprob:train/valid[260,391,final]=(-0.084,-0.064,-0.065/-0.100,-0.091,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1f # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..cb73f020e3e --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,282 @@ +#!/bin/bash + +# 1g is like 1e, but reducing decay-time from 20 to 15, to see if +# it reduces the difference between regular and looped decoding. +# +# There doesn't seem to be a very consistent difference betwen 1e and 1g. + + +# exp/chain/tdnn_lstm_1g_sp: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6042 combine=-0.083->-0.076 xent:train/valid[173,261,final]=(-1.09,-0.929,-0.938/-1.15,-1.04,-1.05) logprob:train/valid[173,261,final]=(-0.089,-0.066,-0.067/-0.102,-0.089,-0.090) + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1g_sp +# WER on train_dev(tg) 12.74 13.03 +# [looped:] 12.93 12.98 +# WER on train_dev(fg) 11.70 12.02 +# [looped:] 12.09 12.13 +# WER on eval2000(tg) 15.7 15.6 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.1 +# [looped:] 14.6 14.4 +# Final train prob -0.066 -0.067 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.938 +# Final valid prob (xent) -1.0279 -1.0473 + + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1g # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=15" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..b12be22ce3d --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1h is like 1e, but reducing the hidden-dims from 1024 to 880. + +# Does not seem to help; both train and valid probs get worse by about +# the same amount, and WER is overall just slightly worse. Maybe 1024 +# was approximately optimal. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1h_sp +# WER on train_dev(tg) 12.74 13.06 +# [looped:] 12.93 13.17 +# WER on train_dev(fg) 11.70 12.13 +# [looped:] 12.09 12.27 +# WER on eval2000(tg) 15.7 15.7 +# [looped:] 15.9 15.9 +# WER on eval2000(fg) 14.3 14.4 +# [looped:] 14.6 14.5 +# Final train prob -0.066 -0.069 +# Final valid prob -0.087 -0.091 +# Final train prob (xent) -0.931 -0.967 +# Final valid prob (xent) -1.0279 -1.0631 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1h # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir=exp/chain/tdnn_lstm_1d_sp/egs + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=880 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=880 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=880 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm2 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=880 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=880 + fast-lstmp-layer name=fastlstm3 cell-dim=880 recurrent-projection-dim=220 non-recurrent-projection-dim=220 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..7e05834c1fb --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,300 @@ +#!/bin/bash + +# run_tdnn_lstm_1i.sh is like run_tdnn_lstm_1{e,f}.sh but +# with a different frames-per-iter: 2 million, vs. 1.5 million +# (1e) and 1 million (1f) + +# Results are inconclusive regarding comparison with 1e: it's [0.3 worse, 0.1 +# better, 0.2 worse, same, 0.2 better, 0.2 better, 0.3 better, 0.3 better] on +# the different conditions. There is less train/valid difference and worse +# train prob [the trends of valid and train probs are consistent as we change +# the frames-per-iter]. + +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1{e,f,i}_sp 2>/dev/null +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp tdnn_lstm_1i_sp +# WER on train_dev(tg) 12.74 13.23 13.08 +# [looped:] 12.93 13.27 13.00 +# WER on train_dev(fg) 11.70 12.17 11.97 +# [looped:] 12.09 12.42 12.08 +# WER on eval2000(tg) 15.7 16.1 15.5 +# [looped:] 15.9 16.2 15.7 +# WER on eval2000(fg) 14.3 14.6 14.0 +# [looped:] 14.6 14.7 14.3 +# Final train prob -0.066 -0.065 -0.069 +# Final valid prob -0.087 -0.090 -0.088 +# Final train prob (xent) -0.931 -0.916 -0.947 +# Final valid prob (xent) -1.0279 -1.0359 -1.0419 + +# run_tdnn_lstm_1e.sh is like run_tdnn_lstm_1d.sh but +# trying the change of xent_regularize from 0.025 (which was an +# unusual value) to the more usual 0.01. + +# WER is worse but this seems to be due to more complete optimization +# (train better, valid worse). Looks like we may be overtraining. +# +# local/chain/compare_wer_general.sh --looped tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# System tdnn_lstm_1e_sp tdnn_lstm_1f_sp +# WER on train_dev(tg) 12.74 13.23 +# [looped:] 12.93 13.27 +# WER on train_dev(fg) 11.70 12.17 +# [looped:] 12.09 12.42 +# WER on eval2000(tg) 15.7 16.1 +# [looped:] 15.9 16.2 +# WER on eval2000(fg) 14.3 14.6 +# [looped:] 14.6 14.7 +# Final train prob -0.066 -0.065 +# Final valid prob -0.087 -0.090 +# Final train prob (xent) -0.931 -0.916 +# Final valid prob (xent) -1.0279 -1.0359 + + +set -e + +# configs for 'chain' +stage=0 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1i # Note: _sp will get added to this if $speed_perturb == true. +decode_iter=final + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b1{1,2,3,4}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 2000000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 15 ]; then + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" --iter $decode_iter \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 50 --cmd "$decode_cmd" --iter $decode_iter \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done +fi +wait; + + + +exit 0; diff --git a/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..6a6a4ba30e1 --- /dev/null +++ b/egs/swbd/s5c/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,311 @@ +#!/bin/bash + +# same as 1e but with delay of -1 +# System tdnn_lstm_1e_sp tdnn_lstm_1j_sp +# WER on train_dev(tg) 12.74 12.95 +# WER on train_dev(fg) 11.70 12.01 +# WER on eval2000(tg) 15.7 15.3 +# WER on eval2000(fg) 14.3 13.9 +# Final train prob -0.066 -0.066 +# Final valid prob -0.087 -0.089 +# Final train prob (xent) -0.931 -0.921 +# Final valid prob (xent) -1.0279 -1.0363 +# exp/chain/tdnn_lstm_1j_sp/: num-iters=262 nj=3..16 num-params=39.6M dim=40+100->6067 combine=-0.076->-0.074 xent:train/valid[173,261,final]=(-1.08,-0.925,-0.921/-1.17,-1.04,-1.04) logprob:train/valid[173,261,final]=(-0.085,-0.067,-0.066/-0.103,-0.090,-0.089) + +set -e + +# configs for 'chain' +stage=12 +train_stage=-10 +get_egs_stage=-10 +speed_perturb=true +dir=exp/chain/tdnn_lstm_1j # Note: _sp will get added to this if $speed_perturb == true. +decode_iter= +decode_nj=50 + +# training options +xent_regularize=0.01 +self_repair_scale=0.00001 +label_delay=5 + +chunk_left_context=40 +chunk_right_context=0 +# we'll put chunk-left-context-initial=0 and chunk-right-context-final=0 +# directly without variables. +frames_per_chunk=140,100,160 + +# (non-looped) decoding options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +# we'll put extra-left-context-initial=0 and extra-right-context-final=0 +# directly without variables. + + +remove_egs=false +common_egs_dir= + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 11 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 12 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; } + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=20" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + + # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults + fast-lstmp-layer name=fastlstm1 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-1 $lstm_opts + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm2 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + relu-renorm-layer name=tdnn6 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn7 input=Append(-3,0,3) dim=1024 + fast-lstmp-layer name=fastlstm3 cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts + + ## adding the layers for chain branch + output-layer name=output input=fastlstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=fastlstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.num-chunk-per-minibatch 64,32 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --egs.stage $get_egs_stage \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width $frames_per_chunk \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --feat-dir data/${train_set}_hires \ + --tree-dir $treedir \ + --lat-dir exp/tri4_lats_nodup$suffix \ + --dir $dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg +fi + + +graph_dir=$dir/graph_sw1_tg +iter_opts= +if [ ! -z $decode_iter ]; then + iter_opts=" --iter $decode_iter " +fi + +if [ $stage -le 15 ]; then + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode.sh --num-threads 4 \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj 25 --cmd "$decode_cmd" $iter_opts \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 \ + --extra-right-context-final 0 \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + +if [ $stage -le 16 ]; then + # looped decoding. Note: this does not make sense for BLSTMs or other + # backward-recurrent setups, and for TDNNs and other non-recurrent there is no + # point doing it because it would give identical results to regular decoding. + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + steps/nnet3/decode_looped.sh \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg_looped || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg}_looped || exit 1; + fi + ) & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in looped decoding" + exit 1 + fi +fi + +if $test_online_decoding && [ $stage -le 17 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + $lang exp/nnet3/extractor $dir ${dir}_online + + rm $dir/.error 2>/dev/null || true + for decode_set in train_dev eval2000; do + ( + # note: we just give it "$decode_set" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" $iter_opts \ + --acwt 1.0 --post-decode-acwt 10.0 \ + $graph_dir data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + ${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1; + fi + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in online decoding" + exit 1 + fi +fi + +exit 0; diff --git a/egs/swbd/s5c/local/eval1997_data_prep.sh b/egs/swbd/s5c/local/eval1997_data_prep.sh index f49ac551192..e29da13deee 100755 --- a/egs/swbd/s5c/local/eval1997_data_prep.sh +++ b/egs/swbd/s5c/local/eval1997_data_prep.sh @@ -5,13 +5,13 @@ # To be run from one directory above this script. -# The input is a directory name containing the 1997 Hub5 english evaluation +# The input is a directory name containing the 1997 Hub5 english evaluation # test set and transcripts, which is LDC2002S10 # e.g. see # http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2002S10 # # It is assumed that the transcripts are in a subdirectory called transcr -# However, we download the STM from NIST site: +# However, we download the STM from NIST site: # ftp://jaguar.ncsl.nist.gov/lvcsr/mar97/eval/hub5e97.english.980618.stm if [ $# -ne 1 ]; then @@ -26,7 +26,7 @@ sdir=$1 [ ! -d $sdir/transcr ] \ && echo Expecting directory $sdir/transcr to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval1997 mkdir -p $dir @@ -40,7 +40,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -49,8 +49,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/speech/97_hub5e.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # There is one line in the 97_hub5e.pem with an extra : on the channel # sw_10022 B: unknown_speaker 281.21 284.37 -- the : is removed @@ -64,7 +64,7 @@ grep -v ';;' $pem | sed -e 's?:??g' \ printf "%s %s %.2f %.2f\n", utt, spk, start, end; }' \ | sort -u > $dir/segments - + # Download the STM and GLM files: ( cd $dir rm -f stm glm @@ -78,9 +78,9 @@ grep -v ';;' $pem | sed -e 's?:??g' \ # stm file has lines like: -# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS -# One of the segments (sw_10022-B_028120-028437) is removed since it is not -# scored and does not show up in the pem file. +# en_4042 A en_4042_A 227.71 232.26 BEANS RIGHT THAT IS WHY I SAID BEANS +# One of the segments (sw_10022-B_028120-028437) is removed since it is not +# scored and does not show up in the pem file. grep -v ';;' $dir/hub5e97.english.980618.stm \ | awk '{ spk=$1"-"$2; @@ -96,7 +96,7 @@ grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -116,4 +116,3 @@ done echo Data preparation and formatting completed for Eval 2000 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/local/eval2000_data_prep.sh b/egs/swbd/s5c/local/eval2000_data_prep.sh index 8d7e1f7ed6e..4c34061a120 100755 --- a/egs/swbd/s5c/local/eval2000_data_prep.sh +++ b/egs/swbd/s5c/local/eval2000_data_prep.sh @@ -1,11 +1,11 @@ #!/bin/bash -# Hub-5 Eval 2000 data preparation +# Hub-5 Eval 2000 data preparation # Author: Arnab Ghoshal (Jan 2013) # To be run from one directory above this script. -# The input is two directory names (possibly the same) containing the +# The input is two directory names (possibly the same) containing the # 2000 Hub5 english evaluation test set and transcripts, which are # respectively: LDC2002S09 LDC2002T43 # e.g. see @@ -35,7 +35,7 @@ tdir=$2 [ ! -d $tdir/reference ] \ && echo Expecting directory $tdir/reference to be present && exit 1; -. path.sh +. path.sh dir=data/local/eval2000 mkdir -p $dir @@ -49,7 +49,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -58,8 +58,8 @@ awk -v sph2pipe=$sph2pipe '{ # segments file format is: utt-id side-id start-time end-time, e.g.: # sw02001-A_000098-001156 sw02001-A 0.98 11.56 pem=$sdir/english/hub5e_00.pem -[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +[ ! -f $pem ] && echo "$0: No such file $pem" && exit 1; +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 # we ignore the warnings below for now, although they seem to indicate some problems @@ -72,7 +72,7 @@ grep -v ';;' $pem \ | sort -u | local/extend_segments.pl 0.1 > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -94,10 +94,10 @@ cp $tdir/reference/en20000405_hub5.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp diff --git a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh old mode 100644 new mode 100755 index 11742173120..7cf42c9ae04 --- a/egs/swbd/s5c/local/nnet3/compare_wer_general.sh +++ b/egs/swbd/s5c/local/nnet3/compare_wer_general.sh @@ -1,48 +1,99 @@ #!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer_general.sh tdnn_c_sp tdnn_d_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer_general.sh tdnn_d_sp tdnn_d_sp_smbr:1 tdnn_d_sp_smbr:2 ... + +echo "# $0 $*"; # print command line. + + echo -n "# System " -for x in $*; do printf "% 10s" $x; done +for x in $*; do printf " % 9s" $x; done echo + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free name, like: +# set_names tdnn_a_sp +# it will set dir=exp/nnet3/tdnn_a_sp and epoch_suffix="" +# If called with something like: +# set_names tdnn_d_sp_smbr:3 +# it will set dir=exp/nnet3/tdnn_d_sp_smbr and epoch_suffix="epoch3" +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + name=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + dirname=exp/nnet3/$name + if [ -z $epoch ]; then + epoch_suffix="" + else + used_epochs=true + epoch_suffix=_epoch${epoch} + fi +} + + echo -n "# WER on train_dev(tg) " for x in $*; do - wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_tg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + # note: the '*' in the directory name is because there + # is _hires_ in there for the cross-entropy systems, and + # nothing for the sequence trained systems. + wer=$(grep WER $dirname/decode_train_dev*sw1_tg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on train_dev(fg) " for x in $*; do - wer=$(grep WER exp/nnet3/${x}_sp/decode_train_dev_hires_sw1_fsh_fg/wer_* | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep WER $dirname/decode_train_dev*sw1_fsh_fg$epoch_suffix/wer_* | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on eval2000(tg) " for x in $*; do - wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_tg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_tg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo echo -n "# WER on eval2000(fg) " for x in $*; do - wer=$(grep Sum exp/nnet3/${x}_sp/decode_eval2000_hires_sw1_fsh_fg/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') + set_names $x + wer=$(grep Sum $dirname/decode_eval2000*sw1_fsh_fg$epoch_suffix/score*/*ys | grep -v swbd | utils/best_wer.sh | awk '{print $2}') printf "% 10s" $wer done echo +if $used_epochs; then + # we don't print the probs in this case. + exit 0 +fi + echo -n "# Final train prob " for x in $*; do - prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_train.combined.log | awk '{print $8}') - printf "% 10s" $prob + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_train.combined.log | awk '{print $8}') + printf "% 10.3f" $prob done echo echo -n "# Final valid prob " for x in $*; do - prob=$(grep log-likelihood exp/nnet3/${x}_sp/log/compute_prob_valid.combined.log | awk '{print $8}') - printf "% 10s" $prob + set_names $x + prob=$(grep log-likelihood $dirname/log/compute_prob_valid.combined.log | awk '{print $8}') + printf "% 10.3f" $prob done echo - diff --git a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh index 99f6a31e708..ba751ad8732 100755 --- a/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh +++ b/egs/swbd/s5c/local/nnet3/run_blstm_discriminative.sh @@ -2,7 +2,9 @@ set -o pipefail set -e -# this is run_discriminative.sh + +# Caution: this script is out of date, it does not use the +# refactored discriminative training script with get_degs.sh. # This script does discriminative training on top of CE BLSTM system. # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, @@ -44,7 +46,6 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.0000125 @@ -53,10 +54,6 @@ num_jobs_nnet=4 num_epochs=4 regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. @@ -138,15 +135,12 @@ if [ -z "$degs_dir" ]; then # have a higher maximum num-jobs if if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi - degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true" - steps/nnet3/get_egs_discriminative.sh \ --cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \ - --adjust-priors $adjust_priors \ --online-ivector-dir $online_ivector_dir \ --left-context $left_context --right-context $right_context \ $frame_subsampling_opt \ - --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \ + --frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \ $train_data_dir data/lang ${srcdir}_ali $lats_dir $srcdir/final.mdl $degs_dir ; fi fi @@ -159,8 +153,6 @@ if [ $stage -le 4 ]; then --num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \ --num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \ --regularization-opts "$regularization_opts" \ - --truncate-deriv-weights $truncate_deriv_weights --adjust-priors $adjust_priors \ - --modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \ ${degs_dir} $dir fi @@ -170,7 +162,7 @@ if [ $stage -le 5 ]; then for decode_set in train_dev eval2000; do ( num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - iter=epoch$x.adj + iter=epoch${x}_adj steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ --online-ivector-dir exp/nnet3/ivectors_${decode_set} $context_opts \ @@ -195,4 +187,3 @@ fi exit 0; - diff --git a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh index 109396ed72e..b64d3e468df 100755 --- a/egs/swbd/s5c/local/nnet3/run_ivector_common.sh +++ b/egs/swbd/s5c/local/nnet3/run_ivector_common.sh @@ -13,6 +13,9 @@ speed_perturb=true mkdir -p nnet3 # perturbed data preparation train_set=train_nodup + +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi + if [ "$speed_perturb" == "true" ]; then if [ $stage -le 1 ]; then #Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment @@ -59,18 +62,7 @@ if [ $stage -le 3 ]; then for dataset in $train_set train_100k_nodup; do utils/copy_data_dir.sh data/$dataset data/${dataset}_hires - # scale the waveforms, this is useful as we don't use CMVN - data_dir=data/${dataset}_hires - cat $data_dir/wav.scp | python -c " -import sys, os, subprocess, re, random -scale_low = 1.0/8 -scale_high = 2.0 -for line in sys.stdin.readlines(): - if len(line.strip()) == 0: - continue - print '{0} sox --vol {1} -t wav - -t wav - |'.format(line.strip(), random.uniform(scale_low, scale_high)) -"| sort -k1,1 -u > $data_dir/wav.scp_scaled || exit 1; - mv $data_dir/wav.scp_scaled $data_dir/wav.scp + utils/data/perturb_data_dir_volume.sh data/${dataset}_hires steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \ --cmd "$train_cmd" data/${dataset}_hires exp/make_hires/$dataset $mfccdir; @@ -81,7 +73,7 @@ for line in sys.stdin.readlines(): utils/fix_data_dir.sh data/${dataset}_hires; done - for dataset in eval2000 train_dev rt03; do + for dataset in eval2000 train_dev $maybe_rt03; do # Create MFCCs for the eval set utils/copy_data_dir.sh data/$dataset data/${dataset}_hires steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 --mfcc-config conf/mfcc_hires.conf \ @@ -128,12 +120,12 @@ if [ $stage -le 8 ]; then # having a larger number of speakers is helpful for generalization, and to # handle per-utterance decoding well (iVector starts at zero). - steps/online/nnet2/copy_data_dir.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 data/${train_set}_hires data/${train_set}_max2_hires steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${train_set}_max2_hires exp/nnet3/extractor exp/nnet3/ivectors_$train_set || exit 1; - for data_set in eval2000 train_dev rt03; do + for data_set in eval2000 train_dev $maybe_rt03; do steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \ data/${data_set}_hires exp/nnet3/extractor exp/nnet3/ivectors_$data_set || exit 1; done diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh new file mode 120000 index 00000000000..e4d47deb7a4 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/run_tdnn_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_d_disc.sh \ No newline at end of file diff --git a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh b/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh deleted file mode 100755 index f422aa92e38..00000000000 --- a/egs/swbd/s5c/local/nnet3/run_tdnn_discriminative.sh +++ /dev/null @@ -1,186 +0,0 @@ -#!/bin/bash - -set -o pipefail -set -e -# this is run_discriminative.sh - -# This script does discriminative training on top of CE nnet3 system. -# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, -# since the lattice generation runs in about real-time, so takes of the order of -# 1000 hours of CPU time. -# -. cmd.sh - - -stage=0 -train_stage=-10 # can be used to start training in the middle. -get_egs_stage=-10 -use_gpu=true # for training -cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats, - # alignments and degs). - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -srcdir=exp/nnet3/nnet_ms_a -train_data_dir=data/train_nodup_sp_hires -online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp -degs_dir= # If provided, will skip the degs directory creation -lats_dir= # If provided, will skip denlats creation - -## Objective options -criterion=smbr -one_silence_class=true - -dir=${srcdir}_${criterion} - -## Egs options -frames_per_eg=150 -frames_overlap_per_eg=30 -truncate_deriv_weights=10 - -## Nnet training options -effective_learning_rate=0.0000125 -max_param_change=1 -num_jobs_nnet=4 -num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options -minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 - -## Decode options -decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. - -if $use_gpu; then - if ! cuda-compiled; then - cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 10 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $ali_dir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +graph_dir=exp/tri4/graph_sw1_tg +if [ $stage -le 11 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh new file mode 100755 index 00000000000..a82b2078acb --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1a.sh @@ -0,0 +1,161 @@ +#!/bin/bash + +# _lfr1a is as _c, but is LFR (low frame rate): it uses triphone chain topology +# with a frame subsampling factor of 3. + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_lfr1a +# WER on train_dev(tg) 17.37 17.25 +# WER on train_dev(fg) 15.94 15.90 +# WER on eval2000(tg) 20.0 20.1 +# WER on eval2000(fg) 18.2 18.5 +# Final train prob -1.43781 -1.32434 +# Final valid prob -1.56895 -1.42206 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn5 dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh new file mode 100755 index 00000000000..8c80dc3d7ad --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1b.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# _lfr1b is as _lfr1a, but with one more -3,3 layer (the comparable +# non-LFR system is tdnn_d) + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_d tdnn_lfr1a tdnn_lfr1b +# WER on train_dev(tg) 16.72 17.25 17.00 +# WER on train_dev(fg) 15.31 15.90 15.57 +# WER on eval2000(tg) 19.2 20.1 19.3 +# WER on eval2000(fg) 17.8 18.5 17.8 +# Final train prob -1.22859 -1.32434 -1.11497 +# Final valid prob -1.354 -1.42206 -1.21105 + + + +stage=0 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,2) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-7,2) dim=1024 + relu-renorm-layer name=tdnn6 dim=1024 + + output-layer name=output input=tdnn6 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh new file mode 100755 index 00000000000..95cdbf7f975 --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +# _lfr1c is as _lfr1a, but uses splicing similar to chain's without changing +# number of layers (comparable non-LFR system is tdnn_e). + +# At this script level we don't support not running on GPU, as it would be painfully slow. +# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false, +# --num-threads 16 and --minibatch-size 128. + +# System tdnn_c tdnn_e tdnn_lfr1c +# WER on train_dev(tg) 17.37 16.75 17.10 +# WER on train_dev(fg) 15.94 15.34 15.74 +# WER on eval2000(tg) 20.0 19.5 19.2 +# WER on eval2000(fg) 18.2 18.0 17.7 +# Final train prob -1.43781 -1.40491 -1.29898 +# Final valid prob -1.56895 -1.55255 -1.43117 + + +stage=11 +affix= +train_stage=-10 +has_fisher=true +speed_perturb=true +common_egs_dir= +#exp/nnet3/tdnn_lfr1b_sp/egs +reporting_email= +remove_egs=true +leftmost_questions_truncate=-1 + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 10 ]; then + # Build a tree using our new topology. This is the critically different + # step compared with other recipes. + steps/nnet3/chain/build_tree.sh --repeat-frames true --frame-subsampling-factor 3 \ + --leftmost-questions-truncate $leftmost_questions_truncate \ + --cmd "$train_cmd" 8400 data/$train_set $lang $ali_dir $treedir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=1024 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1) dim=1024 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024 + + output-layer name=output input=tdnn5 dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.num-epochs 2 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.0017 \ + --trainer.optimization.final-effective-lrate 0.00017 \ + --egs.dir "$common_egs_dir" \ + --cleanup.remove-egs $remove_egs \ + --cleanup.preserve-model-interval 100 \ + --use-gpu true \ + --feat-dir=data/${train_set}_hires \ + --ali-dir $treedir \ + --lang data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + +fi + +echo 3 >$dir/frame_subsampling_factor +graph_dir=$dir/graph_sw1_tg +if [ $stage -le 13 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 0.333 data/lang_sw1_tg $dir $graph_dir +fi + +if [ $stage -le 14 ]; then + for decode_set in train_dev eval2000; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $num_jobs --cmd "$decode_cmd" \ + --online-ivector-dir exp/nnet3/ivectors_${decode_set} \ + $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_hires_sw1_tg || exit 1; + if $has_fisher; then + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ + data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \ + $dir/decode_${decode_set}_hires_sw1_{tg,fsh_fg} || exit 1; + fi + ) & + done +fi +wait; +exit 0; + diff --git a/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh new file mode 100755 index 00000000000..734c5a5d1be --- /dev/null +++ b/egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh @@ -0,0 +1,210 @@ +#!/bin/bash + +# This script does discriminative training on top of the CE nnet3 LFR system +# from run_tdnn_lfr1c. To simplify things, this assumes you are using the +# "speed-perturbed" data +# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the lattice generation runs in about real-time, so takes of the order of +# 1000 hours of CPU time. + +# Comparing effect of shift: +# System tdnn_lfr1c_sp_smbr:1 tdnn_lfr1c_sp_smbr:2 tdnn_lfr1c_sp_smbr:3 tdnn_lfr1c_sp_fs_smbr:1 tdnn_lfr1c_sp_fs_smbr:2 tdnn_lfr1c_sp_fs_smbr:3 +# WER on train_dev(tg) 16.26 16.11 16.02 16.02 15.77 15.78 +# WER on train_dev(fg) 15.01 14.91 14.80 14.79 14.58 14.50 +# WER on eval2000(tg) 18.9 18.7 18.6 18.6 18.5 18.5 +# WER on eval2000(fg) 17.4 17.2 17.1 17.1 17.0 16.9 + + +set -e +set -uo pipefail + +stage=0 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=65 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +# originally ran with no affix, with effective_learning_rate=0.0000125; +# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit +# better, see NOTES, but still best after 1st epoch]. +# reran again with affix=slow and effective_learning_rate=0.0000025 +# reran again with affix=slow2 and effective_learning_rate=0.00000125 (this was +# about the best). +# before checking in the script, removed the slow2 affix but left with +# the lowest learning rate. +disc_affix= + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_decoding=200 +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## Note: extra-left-context and extra-right-context are 0 because this is a TDNN, +## it's not a recurrent model like an LSTM or BLSTM. +extra_left_context=0 +extra_right_context=0 + + +## Nnet training options +effective_learning_rate=0.00000125 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). +shift_feats=false + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +srcdir=exp/nnet3/tdnn_lfr1c_sp +graph_dir=$srcdir/graph_sw1_tg +train_data_dir=data/train_nodup_sp_hires +online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp +dir=${srcdir}_${criterion}${disc_affix} + + +if $use_gpu; then + if ! cuda-compiled; then + cat <" + echo "Usage: $0 " + echo "e.g.: $0 /export/corpora/LDC/LDC2007S10" echo "See comments in the script for more details" exit 1 fi @@ -19,7 +20,7 @@ sdir=$1 [ ! -d $sdir/data/references/eval03/english/cts ] \ && echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1; -. path.sh +. path.sh dir=data/local/rt03 mkdir -p $dir @@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe && echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1; awk -v sph2pipe=$sph2pipe '{ - printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); + printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2); }' < $dir/sph.scp | sort > $dir/wav.scp || exit 1; #side A - channel 1, side B - channel 2 @@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{ # sw02001-A_000098-001156 sw02001-A 0.98 11.56 #pem=$sdir/english/hub5e_00.pem #[ ! -f $pem ] && echo "No such file $pem" && exit 1; -# pem file has lines like: +# pem file has lines like: # en_4156 A unknown_speaker 301.85 302.48 #grep -v ';;' $pem \ @@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \ | sort -u > $dir/segments # stm file has lines like: -# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER +# en_4156 A en_4156_A 357.64 359.64 HE IS A POLICE OFFICER # TODO(arnab): We should really be lowercasing this since the Edinburgh # recipe uses lowercase. This is not used in the actual scoring. #grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \ @@ -77,7 +78,7 @@ cat $tdir/*.stm | \ grep -v inter_segment_gap | \ awk '{ printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\ - > $dir/stm + > $dir/stm #$tdir/reference/hub5e00.english.000405.stm > $dir/stm cp $rtroot/data/trans_rules/en20030506.glm $dir/glm @@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm echo "Segments from pem file and stm file do not match." && exit 1; grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text - + # create an utt2spk file that assumes each conversation side is # a separate speaker. -awk '{print $1,$2;}' $dir/segments > $dir/utt2spk +awk '{print $1,$2;}' $dir/segments > $dir/utt2spk utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt # cp $dir/segments $dir/segments.tmp @@ -110,4 +111,3 @@ done echo Data preparation and formatting completed for RT-03 echo "(but not MFCC extraction)" - diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh index 673513806dc..3d9297b5f19 100755 --- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Formatting the Mississippi State dictionary for use in Edinburgh. Differs +# Formatting the Mississippi State dictionary for use in Edinburgh. Differs # from the one in Kaldi s5 recipe in that it uses lower-case --Arnab (Jan 2013) # To be run from one directory above this script. @@ -16,7 +16,7 @@ mkdir -p $dir srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text # assume swbd_p1_data_prep.sh was done already. -[ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; +[ ! -f "$srcdict" ] && echo "$0: No such file $srcdict" && exit 1; cp $srcdict $dir/lexicon0.txt || exit 1; patch WOLMANIZED # Also, mispronounced words, e.g. @@ -90,4 +90,3 @@ ln -sf lexicon5.txt lexicon.txt # This is the final lexicon. popd >&/dev/null rm $dir/lexiconp.txt 2>/dev/null echo Prepared input dictionary and phone-sets for Switchboard phase 1. - diff --git a/egs/swbd/s5c/run.sh b/egs/swbd/s5c/run.sh index 0eafe73d046..8b08419007d 100755 --- a/egs/swbd/s5c/run.sh +++ b/egs/swbd/s5c/run.sh @@ -72,11 +72,16 @@ fi # local/eval2000_data_prep.sh /home/dpovey/data/LDC2002S09/hub5e_00 /home/dpovey/data/LDC2002T43 local/eval2000_data_prep.sh /export/corpora2/LDC/LDC2002S09/hub5e_00 /export/corpora2/LDC/LDC2002T43 +# prepare the rt03 data. Note: this isn't 100% necessary for this +# recipe, not all parts actually test using rt03. +local/rt03_data_prep.sh /export/corpora/LDC/LDC2007S10 + # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. +if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi mfccdir=mfcc -for x in train eval2000; do +for x in train eval2000 $maybe_rt03; do steps/make_mfcc.sh --nj 50 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir diff --git a/egs/tedlium/s5/cmd.sh b/egs/tedlium/s5/cmd.sh index ba7f120e599..4e0263d7cca 100644 --- a/egs/tedlium/s5/cmd.sh +++ b/egs/tedlium/s5/cmd.sh @@ -11,9 +11,9 @@ #export cuda_cmd=run.pl # JHU cluster: -export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* --mem 4G" -export cuda_cmd="queue.pl -l arch=*64* --gpu 1" +export train_cmd="queue.pl" +export decode_cmd="queue.pl --mem 4G" +export cuda_cmd="queue.pl --gpu 1" host=$(hostname -f) if [ ${host#*.} == "fit.vutbr.cz" ]; then @@ -23,10 +23,10 @@ if [ ${host#*.} == "fit.vutbr.cz" ]; then storage="matylda5" export train_cmd="queue.pl -q $queue -l ram_free=1500M,mem_free=1500M,${storage}=1" export decode_cmd="queue.pl -q $queue -l ram_free=2500M,mem_free=2500M,${storage}=0.5" - export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1" + export cuda_cmd="queue.pl -q $gpu_queue -l gpu=1" elif [ ${host#*.} == "cm.cluster" ]; then # MARCC bluecrab cluster: export train_cmd="slurm.pl --time 4:00:00 " export decode_cmd="slurm.pl --mem 4G --time 4:00:00 " - export cuda_cmd="slurm.pl --gpu 1" + export cuda_cmd="slurm.pl --gpu 1" fi diff --git a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh index a5b80505393..8d7393af853 100755 --- a/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh +++ b/egs/tedlium/s5/local/nnet3/run_tdnn_discriminative.sh @@ -4,7 +4,7 @@ # note: this relies on having a cluster that has plenty of CPUs as well as GPUs, # since the lattice generation runs in about real-time, so takes of the order of # 1000 hours of CPU time. -# +# #%WER 13.3 | 507 17792 | 89.1 8.2 2.8 2.4 13.3 86.0 | -0.207 | exp/nnet3/tdnn_smbr/decode_dev_epoch1.adj/score_12_1.0/ctm.filt.filt.sys #%WER 12.4 | 507 17792 | 89.8 7.5 2.7 2.2 12.4 85.4 | -0.305 | exp/nnet3/tdnn_smbr/decode_dev_epoch1.adj_rescore/score_12_1.0/ctm.filt.filt.sys @@ -52,27 +52,22 @@ dir=${srcdir}_${criterion} ## Egs options frames_per_eg=150 frames_overlap_per_eg=30 -truncate_deriv_weights=10 ## Nnet training options effective_learning_rate=0.0000125 max_param_change=1 num_jobs_nnet=4 num_epochs=4 -regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options minibatch_size=64 -adjust_priors=true # May need to be set to false - # because it does not help in some setups -modify_learning_rates=true -last_layer_factor=0.1 ## Decode options decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. if $use_gpu; then if ! cuda-compiled; then - cat <${dir}_online/sample_decode.sh . cmd.sh data_dir=\$1 # e.g. data/dev_hires (to be prepared by the user, see egs/tedlium/run.sh for examples) -model_dir=\$2 # e.g. exp/nnet2_online/nnet_ms_sp_online (provided in the distribution) +model_dir=\$2 # e.g. exp/nnet2_online/nnet_ms_sp_online (provided in the distribution) decode_dir=\$model_dir/\`basename \$data_dir\` num_jobs=\`cat \$data_dir/spk2utt | wc -l\` diff --git a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh index aebbd66349a..00b2d29cc88 100755 --- a/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh +++ b/egs/tedlium/s5_r2/local/chain/compare_wer_general.sh @@ -1,64 +1,106 @@ #!/bin/bash -echo $0 $* +# this script is used for comparing decoding results between systems. +# e.g. local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_c_sp exp/chain_cleaned/tdnn_c_sp_smbr:{1,2,3} -echo -n "System " -for x in $*; do printf "% 10s" " $(basename $x)"; done -echo -echo -n "WER on dev(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_dev/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo +echo "# $0 $*" -echo -n "WER on dev(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_dev_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done -echo +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi -echo -n "WER on test(orig) " -for x in $*; do - wer=$(grep Sum $x/decode_test/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer -done +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done echo -echo -n "WER on test(rescored)" -for x in $*; do - wer=$(grep Sum $x/decode_test_rescore/score*/*ys | utils/best_wer.sh | awk '{print $2}') - printf "% 10s" $wer +strings=("# WER on dev(orig) " "# WER on dev(rescored) " "# WER on test(orig) " "# WER on test(rescored)") + +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi done -echo -echo -n "Final train prob " +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + +echo -n "# Final train prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob " +echo -n "# Final valid prob " for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final train prob (xent)" +echo -n "# Final train prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done echo -echo -n "Final valid prob (xent)" +echo -n "# Final valid prob (xent)" for x in $*; do prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}') printf "% 10s" $prob done + echo diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh index 8e647598556..fbc28248491 120000 --- a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm.sh @@ -1 +1 @@ -tuning/run_tdnn_lstm_1a.sh \ No newline at end of file +tuning/run_tdnn_lstm_1e.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..d4268b4185a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1e_disc.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..f7a18b4bfcf --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1c.sh @@ -0,0 +1,295 @@ +#!/bin/bash + +# run_tdnn_1c.sh is like run_tdnn_1b.sh but changing chunk-width from 150 to +# '140,110,160', and +# and --trainer.num-chunk-per-minibatch from 128 to 128,64. +# Not better; if anything a little worse. But could possibly be noise. + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1c_sp_bi +# System tdnn1b_sp_bi tdnn1c_sp_bi +# WER on dev(orig) 9.4 9.8 +# WER on dev(rescored) 8.8 9.0 +# WER on test(orig) 9.6 9.7 +# WER on test(rescored) 9.0 9.2 +# Final train prob -0.0870 -0.0942 +# Final valid prob -0.1147 -0.1108 +# Final train prob (xent) -1.4014 -1.4227 +# Final valid prob (xent) -1.5634 -1.4884 + + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + +# Results (11/29/2016, note, this build is is before the upgrade of the LM +# done in Nov 2016): +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_sp_bi exp/chain_cleaned/tdnn1b_sp_bi +# System tdnn_sp_bi tdnn1b_sp_bi +# WER on dev(orig) 10.3 10.2 +# WER on dev(rescored) 9.8 9.6 +# WER on test(orig) 9.8 9.7 +# WER on test(rescored) 9.3 9.2 +# Final train prob -0.0918 -0.0928 +# Final valid prob -0.1190 -0.1178 +# Final train prob (xent) -1.3572 -1.4666 +# Final valid prob (xent) -1.4415 -1.5473 + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1c #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width '140,110,160' \ + --trainer.num-chunk-per-minibatch '128,64' \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh new file mode 100755 index 00000000000..99921a9bf61 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_1d.sh @@ -0,0 +1,256 @@ +#!/bin/bash + + +# run_tdnn_1d.sh is like run_tdnn_1b.sh but using 10 times the self-repair +# scale on the 1st TDNN layer. +# seems a little better- I wouldn't say it was significant normally, but +# it definitely stops the 1st TDNN layer from having under/over-saturated +# neurons. + +# exp/chain_cleaned/tdnn1b_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.47,-1.40,-1.40/-1.61,-1.57,-1.56) logprob:train/valid[167,252,final]=(-0.096,-0.087,-0.087/-0.119,-0.115,-0.115) +# exp/chain_cleaned/tdnn1d_sp_bi: num-iters=253 nj=2..12 num-params=7.0M dim=40+100->3606 combine=-0.10->-0.10 xent:train/valid[167,252,final]=(-1.46,-1.39,-1.39/-1.61,-1.56,-1.55) logprob:train/valid[167,252,final]=(-0.096,-0.088,-0.088/-0.120,-0.115,-0.115) + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn1b_sp_bi exp/chain_cleaned/tdnn1d_sp_bi +# System tdnn1b_sp_bi tdnn1d_sp_bi +# WER on dev(orig) 9.4 9.5 +# WER on dev(rescored) 8.8 8.6 +# WER on test(orig) 9.6 9.4 +# WER on test(rescored) 9.0 8.9 +# Final train prob -0.0870 -0.0878 +# Final valid prob -0.1147 -0.1152 +# Final train prob (xent) -1.4014 -1.3921 +# Final valid prob (xent) -1.5634 -1.5543 + +# run_tdnn_1b.sh is like run_tdnn_1a.sh but upgrading to xconfig-based +# config generation. + + +## how you run this (note: this assumes that the run_tdnn.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn.sh + +# without cleanup: +# local/chain/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run the corresponding non-chain nnet3 system +# (local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# This script is like run_tdnn_1a.sh except it uses an xconfig-based mechanism +# to get the configuration. + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_affix=1d #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + +if [ $stage -le 17 ]; then + mkdir -p $dir + + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=450 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 input=Append(-1,0,1) dim=450 + relu-renorm-layer name=tdnn3 input=Append(-1,0,1,2) dim=450 + relu-renorm-layer name=tdnn4 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn5 input=Append(-3,0,3) dim=450 + relu-renorm-layer name=tdnn6 input=Append(-6,-3,0) dim=450 + + ## adding the layers for chain branch + relu-renorm-layer name=prefinal-chain input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + relu-renorm-layer name=prefinal-xent input=tdnn6 dim=450 target-rms=0.5 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ + +fi + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width 150 \ + --trainer.num-chunk-per-minibatch 128 \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..eb2c91dc3d4 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,317 @@ +#!/bin/bash + +# this is as run_tdnn_lstm_1a.sh, but changing +# frames_per_chunk 150 to 140,100,160 +# and --trainer.num-chunk-per-minibatch from 128 to 128,64 +# and adding +# --egs.chunk-left-context-initial=0 +# and --egs.chunk-right-context-final=0 +# See 1e for summary of results. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1b #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh new file mode 100755 index 00000000000..bb3c5b1a942 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1c.sh @@ -0,0 +1,279 @@ +#!/bin/bash + +# 1c is as 1b, but adding the option --slow-start true. [since removed; it +# takes half the param change from the first two minibatches of each +# job]. The difference is probably just random noise. + + +# local/chain/compare_wer_general.sh exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1c_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1c_sp_bi +# WER on dev(orig) 9.1 8.9 +# WER on dev(rescored) 8.4 8.2 +# WER on test(orig) 8.9 8.9 +# WER on test(rescored) 8.4 8.5 +# Final train prob -0.0621 -0.0620 +# Final valid prob -0.0799 -0.0811 +# Final train prob (xent) -0.8300 -0.8117 +# Final valid prob (xent) -0.9500 -0.9448 + + + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1c #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.slow-start true \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh new file mode 100755 index 00000000000..4be28a4ca97 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1d.sh @@ -0,0 +1,313 @@ +#!/bin/bash + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. +# See 1e for summary of results. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1d #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=40 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh new file mode 100755 index 00000000000..e56946c1b54 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e.sh @@ -0,0 +1,395 @@ +#!/bin/bash + +# 1e is as 1d, but reducing decay-time from 40 to 20. + +# The following table shows comparison of various decay-time values, +# namely: [b:unset=infinity, f:80, d:40, e:20, g:10, g2:5]. +# note: the g2 script is not checked in. +# There is no clear trend on the non-looped decoding, but looped decoding seems +# to improve as decay-time is decreased. We end up recommending decay-time=20, +# as by then we get all the improvement on looped decoding, and it's the +# most conservative setting with which we can get this improvement (although +# actually it seems fine to use an even smaller decay-time). + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{b,f,d,e,g,g2}_sp_bi + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1b_sp_bi exp/chain_cleaned/tdnn_lstm1f_sp_bi exp/chain_cleaned/tdnn_lstm1d_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1g_sp_bi exp/chain_cleaned/tdnn_lstm1g2_sp_bi +# System tdnn_lstm1b_sp_bi tdnn_lstm1f_sp_bi tdnn_lstm1d_sp_bi tdnn_lstm1e_sp_bi tdnn_lstm1g_sp_bi tdnn_lstm1g2_sp_bi +# WER on dev(orig) 9.1 8.8 9.0 9.0 9.0 9.4 +# [looped:] 9.4 9.3 9.2 9.0 8.9 9.4 +# WER on dev(rescored) 8.4 8.2 8.4 8.4 8.4 8.7 +# [looped:] 8.8 8.7 8.6 8.4 8.3 8.7 +# WER on test(orig) 8.9 9.0 8.9 8.8 8.8 9.3 +# [looped:] 9.3 9.3 9.0 8.8 8.8 9.2 +# WER on test(rescored) 8.4 8.6 8.3 8.4 8.4 8.9 +# [looped:] 8.7 8.9 8.5 8.3 8.4 8.8 +# Final train prob -0.0621 -0.0631 -0.0595 -0.0648 -0.0689 -0.0739 +# Final valid prob -0.0799 -0.0802 -0.0823 -0.0827 -0.0890 -0.0963 +# Final train prob (xent) -0.8300 -0.8295 -0.8129 -0.8372 -0.8610 -0.8792 +# Final valid prob (xent) -0.9500 -0.9662 -0.9589 -0.9497 -0.9982 -1.0256 + + +# the following table compares the 'online' decoding with regular and looped +# decoding. online decoding is a little better than either (possibly due to +# using slightly later iVectors). +# +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi{,_online} 2>/dev/null +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_online +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_online +# WER on dev(orig) 9.0 8.8 +# [looped:] 9.0 +# WER on dev(rescored) 8.4 8.4 +# [looped:] 8.4 +# WER on test(orig) 8.8 8.8 +# [looped:] 8.8 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 + + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1e #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir= # you can set this to use previously dumped egs. +remove_egs=true + +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results very much (unlike + # regular decoding)... [it will affect them slightly due to differences in the + # iVector extraction; probably smaller will be worse as it sees less of the future, + # but in a real scenario, long chunks will introduce excessive latency]. + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if $test_online_decoding && [ $stage -le 22 ]; then + # note: if the features change (e.g. you add pitch features), you will have to + # change the options of the following command line. + steps/online/nnet3/prepare_online_decoding.sh \ + --mfcc-config conf/mfcc_hires.conf \ + data/lang_chain exp/nnet3${nnet3_affix}/extractor ${dir} ${dir}_online + + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # note: we just give it "$dset" as it only uses the wav.scp, the + # feature type does not matter. + + steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ + --extra-left-context-initial $extra_left_context_initial \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset} ${dir}_online/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}_online/decode_${dset} ${dir}_online/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh new file mode 100755 index 00000000000..0d64c75aea8 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1e_disc.sh @@ -0,0 +1,264 @@ +#!/bin/bash + +# This script does discriminative training on top of the 1e chain system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1e.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + + +# Below is with 0.00002 and last_layer_factor=0.5 +# this is the setting we're leaving in the script, but the discriminative training +# is not really helping. Maybe we should try the frame-shifted version. +# steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2 +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:num-jobs=4;effective-lrate=2e-05;last-layer-factor=0.50;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.10],valid-counts=[0.28,0.20,0.17] +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbroutslow2:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbroutslow2:1 tdnn_lstm1e_sp_bi_smbroutslow2:2 +# WER on dev(orig) 9.0 8.9 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.7 8.8 +# [looped:] 8.8 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.4 +# [looped:] 8.3 8.4 8.5 + + + +# Below is with 0.00002 and last_layer_factor=1.0. +# b01:s5_r2: steps/info/nnet3_disc_dir_info.pl exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:num-jobs=4;lrate=2e-05;iters-per-epoch=138;epoch[0,1,2]:train-objf=[0.94,0.96,0.97],valid-objf=[0.95,0.96,0.96],train-counts=[0.24,0.12,0.09],valid-counts=[0.28,0.19,0.16] +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr:{1,2} +# System tdnn_lstm1e_sp_bi tdnn_lstm1e_sp_bi_smbr:1 tdnn_lstm1e_sp_bi_smbr:2 +# WER on dev(orig) 9.0 8.8 8.9 +# [looped:] 9.0 8.9 8.9 +# WER on dev(rescored) 8.4 8.3 8.4 +# [looped:] 8.4 8.3 8.4 +# WER on test(orig) 8.8 8.8 8.9 +# [looped:] 8.8 8.8 8.9 +# WER on test(rescored) 8.4 8.4 8.5 +# [looped:] 8.3 8.4 8.5 + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. +# you can set disc_affix if you run different configurations, e.g. --disc-affix "_b" +disc_affix= + + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +srcdir=exp/chain_cleaned/tdnn_lstm1e_sp_bi +graph_dir=$srcdir/graph +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # frames-per-chunk for decoding in alignment and + # denlat decoding. +frames_per_chunk_decoding=140 # frames-per-chunk for decoding when we test + # the models. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.00002 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=2 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +last_layer_factor=0.5 # have the output layer train slower than the others.. this can + # be helpful. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +wait; + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh new file mode 100755 index 00000000000..3ed14f30956 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1f.sh @@ -0,0 +1,315 @@ +#!/bin/bash + +# 1f is as 1d, but increasing decay-time from 40 to 80. [see also 1e, at 20.] +# see 1e for summary of results. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1f #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=80 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh new file mode 100755 index 00000000000..aff39a04025 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1g.sh @@ -0,0 +1,318 @@ +#!/bin/bash + +####################### +# 1g is as 1e, but reducing decay-time further from 20 to 10. +# see 1e for summary of results. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1g #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=10 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh new file mode 100755 index 00000000000..8ffd43f27bc --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1h.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +####################### +# 1h is as 1e, but increasing decay-time from to to 30. +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1h #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=30 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh new file mode 100755 index 00000000000..62497ca59ff --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1i.sh @@ -0,0 +1,337 @@ +#!/bin/bash + +# 1i is as 1e, but adding boundary-offset. No clear effect. +# +# the 3 columns below are: baseline; boundary-offset with that component +# learning with 10x the normal learning rate; boundary-offset with +# regular learning rate. There seems no clear benefit from this +# idea. Reverting the code changes that supported it; +# see ~dpovey/patches/lstm_boundary.patch + + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi exp/chain_cleaned/tdnn_lstm1i_sp_bi.orig_learning_rate +# System tdnn_lstm1e_sp_bi tdnn_lstm1i_sp_bi tdnn_lstm1i_sp_bi.orig_learning_rate +# WER on dev(orig) 9.0 9.1 8.9 +# [looped:] 9.0 9.0 9.0 +# WER on dev(rescored) 8.4 8.3 8.3 +# [looped:] 8.4 8.2 8.2 +# WER on test(orig) 8.8 8.9 8.9 +# [looped:] 8.8 8.9 8.9 +# WER on test(rescored) 8.4 8.4 8.4 +# [looped:] 8.3 8.4 8.4 +# Final train prob -0.0648 -0.0625 -0.0644 +# Final valid prob -0.0827 -0.0833 -0.0855 +# Final train prob (xent) -0.8372 -0.8129 -0.8286 +# Final valid prob (xent) -0.9497 -0.9558 -0.9641 + + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1i #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 boundary-offset=true + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh new file mode 100755 index 00000000000..c9a57f0ab4d --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1j.sh @@ -0,0 +1,334 @@ +#!/bin/bash + +# 1j is as 1e, but adding self-repair-scale=1.0e-04 on 1st tdnn layer [default is 1e-5]. +# It's definitely more effective in preventing under or over-saturated ReLUs, but +# it's not clear that there is any other benefit. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,j}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1j_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1j_sp_bi +# WER on dev(orig) 9.0 9.1 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.5 +# WER on test(orig) 8.8 9.0 +# [looped:] 8.8 9.1 +# WER on test(rescored) 8.4 8.6 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0646 +# Final valid prob -0.0827 -0.0835 +# Final train prob (xent) -0.8372 -0.8296 +# Final valid prob (xent) -0.9497 -0.9597 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1j #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 self-repair-scale=1.0e-04 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 decay-time=20 delay=-3 + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh new file mode 100755 index 00000000000..ab9d6ce6342 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1k.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1k #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh new file mode 100755 index 00000000000..e09df86558a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1l.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1l is as 1k, but having the dropout end at the end of training, not @0.75. + +# see run_tdnn_lstm_1k.sh for results. + + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1l #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh new file mode 100755 index 00000000000..3e75c9fe3e0 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1m.sh @@ -0,0 +1,330 @@ +#!/bin/bash + + +# 1m is as 1l, but having the dropout end at 0.1 +# see run_tdnn_lstm_1k.sh for results. + +# 1l is as 1k, but having the dropout end at the end of training. + +# 1k is as 1e, but introducing a dropout schedule. + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1m #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.7@0.5,0.1' \ + --trainer.optimization.combine-sum-to-one-penalty=0.001 \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh new file mode 100755 index 00000000000..ed79404f815 --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1n.sh @@ -0,0 +1,340 @@ +#!/bin/bash + +# 1n is as 1k, but maxing out at 0.5, not 0.7. +# 1k is as 1e, but introducing a dropout schedule. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 +# [looped:] 9.0 8.6 8.9 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 +# [looped:] 8.4 7.8 8.2 8.3 +# WER on test(orig) 8.8 8.8 8.9 8.9 +# [looped:] 8.8 8.7 8.8 8.8 +# WER on test(rescored) 8.4 8.3 8.2 8.5 +# [looped:] 8.3 8.3 8.3 8.4 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 + + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1n #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.5@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh new file mode 100755 index 00000000000..ec97bce3a8b --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1o.sh @@ -0,0 +1,344 @@ +#!/bin/bash + +# 1o is as 1k, but putting the dropout on (c,m), i.e. the output +# of the LstmNonlinearityComponent, which I believe is the same as +# putting it on (i,f) which Gaofeng found worked well in the non-fast Lstm +# component; and using schedule which maxes out at 0.3, not 0.7. +# [note: this was a little worse. turns out it was not the same as +# what gaofeng did because he had separate masks on (i,f). +# note: I've since removed the script-level support for this. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,k,l,m,n,o}_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1k_sp_bi tdnn_lstm1l_sp_bi tdnn_lstm1m_sp_bi tdnn_lstm1n_sp_bi tdnn_lstm1o_sp_bi +# WER on dev(orig) 9.0 8.7 8.9 9.0 8.8 8.8 +# [looped:] 9.0 8.6 8.9 8.9 8.8 8.9 +# WER on dev(rescored) 8.4 7.9 8.2 8.2 8.1 8.1 +# [looped:] 8.4 7.8 8.2 8.3 8.1 8.2 +# WER on test(orig) 8.8 8.8 8.9 8.9 8.7 8.7 +# [looped:] 8.8 8.7 8.8 8.8 8.7 8.7 +# WER on test(rescored) 8.4 8.3 8.2 8.5 8.3 8.2 +# [looped:] 8.3 8.3 8.3 8.5 8.3 8.2 +# Final train prob -0.0648 -0.0693 -0.0768 -0.0807 -0.0702 -0.0698 +# Final valid prob -0.0827 -0.0854 -0.0943 -0.0931 -0.0836 -0.0858 +# Final train prob (xent) -0.8372 -0.8848 -0.9371 -0.9807 -0.8719 -0.8998 +# Final valid prob (xent) -0.9497 -0.9895 -1.0546 -1.0629 -0.9732 -1.0084 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +# decode options +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 +frames_per_chunk=140,100,160 +frames_per_chunk_primary=140 + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1o #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + # note: the value of the dropout-proportion is not important, as it's + # controlled by the dropout schedule; what's important is that we set it. + lstmp_opts="decay-time=20 dropout-proportion=0.0 dropout-place=2 dropout-per-frame=true" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --trainer.dropout-schedule='0,0@0.20,0.3@0.5,0@0.75,0' \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir \ + --cleanup=false + # --cleanup=false is temporary while debugging. +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh new file mode 100755 index 00000000000..b3da38e412a --- /dev/null +++ b/egs/tedlium/s5_r2/local/chain/tuning/run_tdnn_lstm_1r.sh @@ -0,0 +1,339 @@ +#!/bin/bash + +# 1r is as 1e, but changing update-period of natural gradient from 4 to 1, +# Not helpful. + +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1{e,r}_sp_bi +# local/chain/compare_wer_general.sh --looped exp/chain_cleaned/tdnn_lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1r_sp_bi +# System tdnn_lstm1e_sp_bi tdnn_lstm1r_sp_bi +# WER on dev(orig) 9.0 9.0 +# [looped:] 9.0 9.1 +# WER on dev(rescored) 8.4 8.5 +# [looped:] 8.4 8.6 +# WER on test(orig) 8.8 9.1 +# [looped:] 8.8 9.0 +# WER on test(rescored) 8.4 8.4 +# [looped:] 8.3 8.5 +# Final train prob -0.0648 -0.0642 +# Final valid prob -0.0827 -0.0838 +# Final train prob (xent) -0.8372 -0.8319 +# Final valid prob (xent) -0.9497 -0.9635 + +# 1e is as 1b, but reducing decay-time from 40 to 20. + +# 1d is as 1b, but adding decay-time=40 to the fast-lstmp-layers. note: it +# uses egs from 1b, remember to remove that before I commit. + +# steps/info/chain_dir_info.pl exp/chain_cleaned/tdnn_lstm1a_sp_bi +# exp/chain_cleaned/tdnn_lstm1a_sp_bi: num-iters=253 nj=2..12 num-params=9.5M dim=40+100->3607 combine=-0.07->-0.07 xent:train/valid[167,252,final]=(-0.960,-0.859,-0.852/-1.05,-0.999,-0.997) logprob:train/valid[167,252,final]=(-0.076,-0.064,-0.062/-0.099,-0.092,-0.091) + +# This is as run_lstm1e.sh except adding TDNN layers in between; also comparing below +# with run_lstm1d.sh which had a larger non-recurrent-projection-dim and which had +# better results. Note: these results are not with the updated LM (the LM data-prep +# for this setup was changed in Nov 2016 but this was with an older directory). +# +# local/chain/compare_wer_general.sh exp/chain_cleaned/lstm1d_sp_bi exp/chain_cleaned/lstm1e_sp_bi exp/chain_cleaned/tdnn_lstm1a_sp_bi +# System lstm1d_sp_bi lstm1e_sp_bi tdnn_lstm1a_sp_bi +# WER on dev(orig) 10.3 10.7 9.7 +# WER on dev(rescored) 9.8 10.1 9.3 +# WER on test(orig) 9.7 9.8 9.1 +# WER on test(rescored) 9.2 9.4 8.7 +# Final train prob -0.0812 -0.0862 -0.0625 +# Final valid prob -0.1049 -0.1047 -0.0910 +# Final train prob (xent) -1.1334 -1.1763 -0.8518 +# Final valid prob (xent) -1.2263 -1.2427 -0.9972 + +## how you run this (note: this assumes that the run_tdnn_lstm.sh soft link points here; +## otherwise call it directly in its location). +# by default, with cleanup: +# local/chain/run_tdnn_lstm.sh + +# without cleanup: +# local/chain/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + +# note, if you have already run one of the non-chain nnet3 systems +# (e.g. local/nnet3/run_tdnn.sh), you may want to run with --stage 14. + +# run_tdnn_lstm_1a.sh was modified from run_lstm_1e.sh, which is a fairly +# standard, LSTM, except that some TDNN layers were added in between the +# LSTM layers. I was looking at egs/ami/s5b/local/chain/tuning/run_tdnn_lstm_1i.sh, but +# this isn't exactly copied from there. + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +label_delay=5 +xent_regularize=0.1 +train_set=train_cleaned +gmm=tri3_cleaned # the gmm for the target data +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for nnet3 and chain dirs, e.g. _cleaned +# training options +chunk_left_context=40 +chunk_right_context=0 +chunk_left_context_initial=0 +chunk_right_context_final=0 +frames_per_chunk=140,100,160 +# decode options +frames_per_chunk_primary=$(echo $frames_per_chunk | cut -d, -f1) +extra_left_context=50 +extra_right_context=0 +extra_left_context_initial=0 +extra_right_context_final=0 + + +# The rest are configs specific to this script. Most of the parameters +# are just hardcoded at this level, in the commands below. +train_stage=-10 +tree_affix= # affix for tree directory, e.g. "a" or "b", in case we change the configuration. +tdnn_lstm_affix=1r #affix for TDNN-LSTM directory, e.g. "a" or "b", in case we change the configuration. +common_egs_dir=exp/chain_cleaned/tdnn_lstm1b_sp_bi/egs # you can set this to use previously dumped egs. +remove_egs=true + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <data/lang_chain/topo + fi +fi + +if [ $stage -le 15 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 16 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --leftmost-questions-truncate -1 \ + --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir +fi + + +if [ $stage -le 17 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + tdnn_opts='ng-affine-options="update-period=1"' + lstmp_opts='ng-affine-options="update-period=1" decay-time=20' + output_opts='max-change=1.5 ng-affine-options="update-period=1"' + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=512 $tdnn_opts + relu-renorm-layer name=tdnn2 dim=512 input=Append(-1,0,1) $tdnn_opts + fast-lstmp-layer name=lstm1 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn3 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn4 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm2 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + relu-renorm-layer name=tdnn5 dim=512 input=Append(-3,0,3) $tdnn_opts + relu-renorm-layer name=tdnn6 dim=512 input=Append(-3,0,3) $tdnn_opts + fast-lstmp-layer name=lstm3 cell-dim=512 recurrent-projection-dim=128 non-recurrent-projection-dim=128 delay=-3 $lstmp_opts + + ## adding the layers for chain branch + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets $output_opts + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 18 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/ami-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage $train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize 0.1 \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --egs.chunk-width "$frames_per_chunk" \ + --egs.chunk-left-context "$chunk_left_context" \ + --egs.chunk-right-context "$chunk_right_context" \ + --egs.chunk-left-context-initial "$chunk_left_context_initial" \ + --egs.chunk-right-context-final "$chunk_right_context_final" \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.frames-per-iter 1500000 \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.deriv-truncate-margin 10 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 2 \ + --trainer.optimization.num-jobs-final 12 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.optimization.momentum 0.0 \ + --cleanup.remove-egs "$remove_egs" \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir $lat_dir \ + --dir $dir +fi + + + +if [ $stage -le 19 ]; then + # Note: it might appear that this data/lang_chain directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph +fi + +if [ $stage -le 20 ]; then + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --num-threads 4 --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial $extra_left_context_initial \ + --extra-right-context-final $extra_right_context_final \ + --frames-per-chunk "$frames_per_chunk_primary" \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +if [ $stage -le 21 ]; then + # 'looped' decoding. we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --acwt 1.0 --post-decode-acwt 10.0 \ + --extra-left-context-initial $extra_left_context_initial \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $dir/graph data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + +exit 0 diff --git a/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh new file mode 100755 index 00000000000..da0bb728e69 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/compare_wer.sh @@ -0,0 +1,133 @@ +#!/bin/bash + +# this script is used for comparing decoding results between systems. +# e.g. local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_{c,d}_sp +# For use with discriminatively trained systems you specify the epochs after a colon: +# for instance, +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_c_sp exp/nnet3_cleaned/tdnn_c_sp_smbr:{1,2,3} + + +if [ $# == 0 ]; then + echo "Usage: $0: [--looped] [--online] [ ... ]" + echo "e.g.: $0 exp/nnet3_cleaned/tdnn_{b,c}_sp" + echo "or (with epoch numbers for discriminative training):" + echo "$0 exp/nnet3_cleaned/tdnn_b_sp_disc:{1,2,3}" + exit 1 +fi + +echo "# $0 $*" + +include_looped=false +if [ "$1" == "--looped" ]; then + include_looped=true + shift +fi +include_online=false +if [ "$1" == "--online" ]; then + include_online=true + shift +fi + + + +used_epochs=false + +# this function set_names is used to separate the epoch-related parts of the name +# [for discriminative training] and the regular parts of the name. +# If called with a colon-free directory name, like: +# set_names exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr +# it will set dir=exp/chain_cleaned/tdnn_lstm1e_sp_bi_smbr and epoch_infix="" +# If called with something like: +# set_names exp/chain_cleaned/tdnn_d_sp_smbr:3 +# it will set dir=exp/chain_cleaned/tdnn_d_sp_smbr and epoch_infix="_epoch3" + + +set_names() { + if [ $# != 1 ]; then + echo "compare_wer_general.sh: internal error" + exit 1 # exit the program + fi + dirname=$(echo $1 | cut -d: -f1) + epoch=$(echo $1 | cut -s -d: -f2) + if [ -z $epoch ]; then + epoch_infix="" + else + used_epochs=true + epoch_infix=_epoch${epoch} + fi +} + + + +echo -n "# System " +for x in $*; do printf "% 10s" " $(basename $x)"; done +echo + +strings=("# WER on dev(orig) " "# WER on dev(rescored) " "# WER on test(orig) " "# WER on test(rescored)") + +for n in 0 1 2 3; do + echo -n "${strings[$n]}" + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + if $include_looped; then + echo -n "# [looped:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum $dirname/decode_looped_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi + if $include_online; then + echo -n "# [online:] " + for x in $*; do + set_names $x # sets $dirname and $epoch_infix + decode_names=(dev${epoch_infix} dev${epoch_infix}_rescore test${epoch_infix} test${epoch_infix}_rescore) + wer=$(grep Sum ${dirname}_online/decode_${decode_names[$n]}/score*/*ys | utils/best_wer.sh | awk '{print $2}') + printf "% 10s" $wer + done + echo + fi +done + + +if $used_epochs; then + exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems. +fi + + +echo -n "# Final train prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid prob " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final train acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo -n "# Final valid acc " +for x in $*; do + prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}') + printf "% 10s" $prob +done +echo + +echo diff --git a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh index b4f2dd3e3b4..16093616b05 100755 --- a/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh +++ b/egs/tedlium/s5_r2/local/nnet3/run_ivector_common.sh @@ -21,9 +21,9 @@ num_threads_ubm=32 nnet3_affix=_cleaned # affix for exp/nnet3 directory to put iVector stuff in, so it # becomes exp/nnet3_cleaned or whatever. -. cmd.sh +. ./cmd.sh . ./path.sh -. ./utils/parse_options.sh +. utils/parse_options.sh gmm_dir=exp/${gmm} diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh deleted file mode 100755 index 91ba913c183..00000000000 --- a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash - -# This is the standard "tdnn" system, built in nnet3; this script -# is the version that's meant to run with data-cleanup, that doesn't -# support parallel alignments. - - -# by default, with cleanup: -# local/nnet3/run_tdnn.sh - -# without cleanup: -# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & - - -set -e -o pipefail -u - -# First the options that are passed through to run_ivector_common.sh -# (some of which are also used in this script directly). -stage=0 -nj=30 -decode_nj=30 -min_seg_len=1.55 -train_set=train_cleaned -gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it - # should have alignments for the specified training data. -num_threads_ubm=32 -nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned -tdnn_affix= #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. - -# Options which are not passed through to run_ivector_common.sh -train_stage=-10 -splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" -remove_egs=true -relu_dim=850 -num_epochs=3 - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat </dev/null - for dset in dev test; do - ( - steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ - --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ - ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 - steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ - data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 - ) || touch $dir/.error & - done - wait - [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 -fi - - -exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh new file mode 120000 index 00000000000..61f8f499182 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn.sh @@ -0,0 +1 @@ +tuning/run_tdnn_1b.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh new file mode 120000 index 00000000000..8e647598556 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh new file mode 120000 index 00000000000..50d28fb91f3 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_disc.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_1a_disc.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh new file mode 120000 index 00000000000..8e03c924bc1 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/run_tdnn_lstm_lfr.sh @@ -0,0 +1 @@ +tuning/run_tdnn_lstm_lfr_1a.sh \ No newline at end of file diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh new file mode 100755 index 00000000000..80ff91b8606 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1a.sh @@ -0,0 +1,120 @@ +#!/bin/bash + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +splice_indexes="-2,-1,0,1,2 -1,2 -3,3 -7,2 -3,3 0 0" +remove_egs=true +relu_dim=850 +num_epochs=3 + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat </dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh new file mode 100755 index 00000000000..f6e4fb71b75 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1b.sh @@ -0,0 +1,172 @@ +#!/bin/bash + + +# 1b is as 1a but uses xconfigs. + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1b #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +relu_dim=850 +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=850 + relu-renorm-layer name=tdnn2 dim=850 input=Append(-1,2) + relu-renorm-layer name=tdnn3 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn4 dim=850 input=Append(-7,2) + relu-renorm-layer name=tdnn5 dim=850 input=Append(-3,3) + relu-renorm-layer name=tdnn6 dim=850 + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh new file mode 100755 index 00000000000..35789342ffb --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_1c.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +# 1c is as 1b but using more 'chain-like' splicing and slightly +# smaller dim. Not better; maybe slightly worse. + +# note: the num-params is almost the same. +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1{b,c}_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) +# exp/nnet3_cleaned/tdnn1c_sp: num-iters=240 nj=2..12 num-params=10.1M dim=40+100->4187 combine=-1.16->-1.15 loglike:train/valid[159,239,combined]=(-1.22,-1.16,-1.15/-1.41,-1.38,-1.38) accuracy:train/valid[159,239,combined]=(0.66,0.67,0.68/0.62,0.63,0.63) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1{b,c}_sp +# System tdnn1b_sp tdnn1c_sp +# WER on dev(orig) 11.7 11.9 +# WER on dev(rescored) 10.9 11.1 +# WER on test(orig) 11.7 11.8 +# WER on test(rescored) 11.0 11.2 +# Final train prob -0.9416 -1.1505 +# Final valid prob -1.1496 -1.3805 +# Final train acc 0.7241 0.6756 +# Final valid acc 0.6788 0.6255 + +# This is the standard "tdnn" system, built in nnet3; this script +# is the version that's meant to run with data-cleanup, that doesn't +# support parallel alignments. + + +# steps/info/nnet3_dir_info.pl exp/nnet3_cleaned/tdnn1b_sp +# exp/nnet3_cleaned/tdnn1b_sp: num-iters=240 nj=2..12 num-params=10.3M dim=40+100->4187 combine=-0.95->-0.95 loglike:train/valid[159,239,combined]=(-1.01,-0.95,-0.94/-1.18,-1.16,-1.15) accuracy:train/valid[159,239,combined]=(0.71,0.72,0.72/0.67,0.68,0.68) + +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn1a_sp exp/nnet3_cleaned/tdnn1b_sp +# System tdnn1a_sp tdnn1b_sp +# WER on dev(orig) 11.9 11.7 +# WER on dev(rescored) 11.2 10.9 +# WER on test(orig) 11.6 11.7 +# WER on test(rescored) 11.0 11.0 +# Final train prob -0.9255 -0.9416 +# Final valid prob -1.1842 -1.1496 +# Final train acc 0.7245 0.7241 +# Final valid acc 0.6771 0.6788 + + +# by default, with cleanup: +# local/nnet3/run_tdnn.sh + +# without cleanup: +# local/nnet3/run_tdnn.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1c #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh new file mode 100755 index 00000000000..666c2f1bb31 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lfr_1a.sh @@ -0,0 +1,200 @@ +#!/bin/bash + + +# run_tdnn_lfr_1a.sh is similar in configuration to run_tdnn_1c.sh, but it's a +# low-frame-rate system (see egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c.sh +# for an example of such a system). + + +# by default, with cleanup: +# local/nnet3/run_tdnn_lfr.sh + +# without cleanup: +# local/nnet3/run_tdnn_lfr.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned +tdnn_affix=1a #affix for TDNN directory e.g. "a" or "b", in case we change the configuration. + +# Options which are not passed through to run_ivector_common.sh +train_stage=-10 +remove_egs=true +srand=0 +reporting_email=dpovey@gmail.com +# set common_egs_dir to use previously dumped egs. +common_egs_dir= + +. cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat <$lang/topo +fi + +if [ $stage -le 13 ]; then + # Build a tree using our new topology and a reduced sampling rate. + # We use 4000 leaves, which is a little less than the number used + # in the baseline GMM system (5k) in this setup, since generally + # LFR systems do best with somewhat fewer leaves. + # + # To get the stats to build the tree this script only uses every third frame, + # but it dumps converted alignments that essentially have 3 different + # frame-shifted versions of the alignment interpolated together; these can be + # used without modification in getting labels for training. + steps/nnet3/chain/build_tree.sh \ + --repeat-frames true --frame-subsampling-factor 3 \ + --cmd "$train_cmd" 4000 data/${train_set}_sp_comb \ + $lang $ali_dir $treedir +fi + +if [ $stage -le 14 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=750 + relu-renorm-layer name=tdnn2 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn3 dim=750 input=Append(-1,0,1) + relu-renorm-layer name=tdnn4 dim=750 input=Append(-3,0,3) + relu-renorm-layer name=tdnn5 dim=750 input=Append(-6,-3,0) + output-layer name=output dim=$num_targets max-change=1.5 +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + + +if [ $stage -le 15 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_dnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=3 \ + --trainer.samples-per-iter=400000 \ + --trainer.optimization.num-jobs-initial=2 \ + --trainer.optimization.num-jobs-final=12 \ + --trainer.optimization.initial-effective-lrate=0.0015 \ + --trainer.optimization.final-effective-lrate=0.00015 \ + --trainer.optimization.minibatch-size=256,128 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$treedir \ + --lang=$lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; + echo 3 >$dir/frame_subsampling_factor +fi + +if [ $stage -le 16 ]; then + # The reason we are using data/lang here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + + utils/lang/check_phones_compatible.sh data/lang/phones.txt $lang/phones.txt + utils/mkgraph.sh --self-loop-scale 0.333 data/lang $dir $dir/graph +fi + + +if [ $stage -le 17 ]; then + # note: for TDNNs, looped decoding gives exactly the same results + # as regular decoding, so there is no point in testing it separately. + # We use regular decoding because it supports multi-threaded (we just + # didn't create the binary for that, for looped decoding, so far). + rm $dir/.error || true 2>/dev/null + for dset in dev test; do + ( + steps/nnet3/decode.sh --acwt 0.333 --post-decode-acwt 3.0 --nj $decode_nj \ + --cmd "$decode_cmd" --num-threads 4 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $dir/graph data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh new file mode 100755 index 00000000000..28c45836cf7 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a.sh @@ -0,0 +1,219 @@ +#!/bin/bash + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.4 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.7 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1a +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 decay-time=20 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $chunk_left_context \ + --extra-right-context $chunk_right_context \ + --frames-per-chunk $frames_per_chunk \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh new file mode 100755 index 00000000000..1826caf3d05 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1a_disc.sh @@ -0,0 +1,246 @@ +#!/bin/bash + +# This script does discriminative training on top of CE nnet3 system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1a.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + +# below is with the current settings (effective_learning_rate=0.0000025, last_layer_factor=0.5): +# steps/info/nnet3_disc_dir_info.pl exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:num-jobs=4;effective-lrate=2.5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.96,0.97,0.97],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.25,0.17,0.12],valid-counts=[0.57,0.31,0.34,0.35] + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbrslow:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbrslow:1 tdnn_lstm1a_sp_smbrslow:2 tdnn_lstm1a_sp_smbrslow:3 +# WER on dev(orig) 11.0 9.4 9.4 9.4 +# [looped:] 11.0 9.4 9.5 9.4 +# WER on dev(rescored) 10.3 8.8 8.7 8.7 +# [looped:] 10.3 8.8 8.9 8.9 +# WER on test(orig) 10.8 9.6 9.7 9.6 +# [looped:] 10.7 9.6 9.6 9.7 +# WER on test(rescored) 10.1 9.1 9.2 9.1 +# [looped:] 10.0 9.1 9.2 9.1 + +# Below is with twice the lrate (5e-06) and the same last-layer-factor (0.5). Trained too fast. +# exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:num-jobs=4;effective-lrate=5e-06;last-layer-factor=0.50;iters-per-epoch=55;epoch[0,1,2,3]:train-objf=[0.94,0.97,0.97,0.98],valid-objf=[0.91,0.93,0.93,0.93],train-counts=[0.40,0.22,0.12,0.09],valid-counts=[0.57,0.31,0.27,0.32] +# I'm not showing the looped decoding results with this older step; +# there was a script bug (now fixed) and I don't want to rerun them. +# local/nnet3/compare_wer.sh exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1a_sp_smbr:{1,2,3} +# System tdnn_lstm1a_sp tdnn_lstm1a_sp_smbr:1 tdnn_lstm1a_sp_smbr:2 tdnn_lstm1a_sp_smbr:3 +# WER on dev(orig) 11.0 9.4 9.4 9.5 +# WER on dev(rescored) 10.3 8.8 8.8 8.9 +# WER on test(orig) 10.8 9.6 9.8 9.8 +# WER on test(rescored) 10.1 9.1 9.3 9.4 + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri3_cleaned/graph +srcdir=exp/nnet3_cleaned/tdnn_lstm1a_sp +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +# originally ran with effective_learning_rate=0.000005, +# changing to effective_learning_rate=0.0000025 and using affix=slow + +# you can set --disc-affix if you run different configurations. +disc_affix= + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1a.sh, so it defaults to 50. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.0000025 +last_layer_factor=0.5 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=3 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat </dev/null || true + + for x in `seq $decode_start_epoch $num_epochs`; do + for decode_set in dev test; do + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + iter=epoch$x + # We don't test the iter "epoch${x}_adj", although it's computed, + # because prior-adjustment doesn't make sense for chain models + # and it degrades the results. + ( + steps/nnet3/decode_looped.sh \ + --nj $num_jobs --cmd "$decode_cmd" --iter $iter \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3_cleaned/ivectors_${decode_set}_hires \ + --scoring-opts "--min-lmwt 5 " \ + $graph_dir data/${decode_set}_hires $dir/decode_looped_${decode_set}_${iter} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${decode_set}_hires \ + ${dir}/decode_looped_${decode_set}_${iter} ${dir}/decode_looped_${decode_set}_${iter}_rescore || exit 1 + ) || touch $dir/.error & + done + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +if [ $stage -le 6 ] && $cleanup; then + # if you run with "--cleanup true --stage 6" you can clean up. + # actually, keep the alignments in case we need them later.. they're slow to + # create, and quite big. + # rm ${srcdir}_ali/ali.*.gz || true + + steps/nnet2/remove_egs.sh ${srcdir}_degs || true +fi + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh new file mode 100755 index 00000000000..8b8af6eff78 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b.sh @@ -0,0 +1,240 @@ +#!/bin/bash + +# 1b is as 1a, but removing the decay-time option as a baseline. + +# the decay-time option does seem to be having the expected interaction with +# 'looped' decoding, i.e. with the decay-time option we don't get a degradation +# from looped decoding (if anything, with decay time, looped decoding is a +# little better than baseline decoding). + +# local/nnet3/compare_wer.sh --looped exp/nnet3_cleaned/tdnn_lstm1a_sp exp/nnet3_cleaned/tdnn_lstm1b_sp +# System tdnn_lstm1a_sp tdnn_lstm1b_sp +# WER on dev(orig) 11.0 11.0 +# [looped:] 11.0 11.1 +# WER on dev(rescored) 10.3 10.3 +# [looped:] 10.3 10.5 +# WER on test(orig) 10.8 10.6 +# [looped:] 10.7 10.7 +# WER on test(rescored) 10.1 9.9 +# [looped:] 10.0 10.0 +# Final train prob -0.6881 -0.6897 +# Final valid prob -0.7796 -0.7989 +# Final train acc 0.7954 0.7946 +# Final valid acc 0.7611 0.7582 + + + +# this is a TDNN+LSTM system; the configuration is similar to +# local/chain/tuning/run_tdnn_lstm_1e.sh, but a non-chain nnet3 system, and +# with 1.5 times larger hidden dimensions. + +# by default, with cleanup: +# local/nnet3/run_tdnn_lstm.sh + +# without cleanup: +# local/nnet3/run_tdnn_lstm.sh --train-set train --gmm tri3 --nnet3-affix "" & + + +set -e -o pipefail -u + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +decode_nj=30 +min_seg_len=1.55 +train_set=train_cleaned +gmm=tri3_cleaned # this is the source gmm-dir for the data-type of interest; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix=_cleaned # cleanup affix for exp dirs, e.g. _cleaned + +# Options which are not passed through to run_ivector_common.sh +affix=1b +common_egs_dir= +reporting_email= + +# LSTM options +train_stage=-10 +label_delay=5 + +# training chunk-options +chunk_width=40,30,20 +chunk_left_context=40 +chunk_right_context=0 +# decode chunk-size options (for non-looped decoding) +extra_left_context=50 +extra_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +extra_left_context= +extra_right_context= +frames_per_chunk= + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +if ! cuda-compiled; then + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-renorm-layer name=tdnn1 dim=768 + relu-renorm-layer name=tdnn2 dim=768 input=Append(-1,0,1) + fast-lstmp-layer name=lstm1 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn3 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn4 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm2 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + relu-renorm-layer name=tdnn5 dim=768 input=Append(-3,0,3) + relu-renorm-layer name=tdnn6 dim=768 input=Append(-3,0,3) + fast-lstmp-layer name=lstm3 cell-dim=768 recurrent-projection-dim=192 non-recurrent-projection-dim=192 delay=-3 + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets max-change=1.5 + +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/train_rnn.py --stage=$train_stage \ + --cmd="$decode_cmd" \ + --feat.online-ivector-dir=$train_ivector_dir \ + --feat.cmvn-opts="--norm-means=false --norm-vars=false" \ + --trainer.srand=$srand \ + --trainer.max-param-change=2.0 \ + --trainer.num-epochs=6 \ + --trainer.deriv-truncate-margin=10 \ + --trainer.samples-per-iter=10000 \ + --trainer.optimization.num-jobs-initial=3 \ + --trainer.optimization.num-jobs-final=15 \ + --trainer.optimization.initial-effective-lrate=0.0003 \ + --trainer.optimization.final-effective-lrate=0.00003 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.rnn.num-chunk-per-minibatch=128,64 \ + --trainer.optimization.momentum=0.5 \ + --egs.chunk-width=$chunk_width \ + --egs.chunk-left-context=$chunk_left_context \ + --egs.chunk-right-context=$chunk_right_context \ + --egs.chunk-left-context-initial=0 \ + --egs.chunk-right-context-final=0 \ + --egs.dir="$common_egs_dir" \ + --cleanup.remove-egs=$remove_egs \ + --use-gpu=true \ + --feat-dir=$train_data_dir \ + --ali-dir=$ali_dir \ + --lang=data/lang \ + --reporting.email="$reporting_email" \ + --dir=$dir || exit 1; +fi + +if [ $stage -le 14 ]; then + [ -z $extra_left_context ] && extra_left_context=$chunk_left_context; + [ -z $extra_right_context ] && extra_right_context=$chunk_right_context; + [ -z $frames_per_chunk ] && frames_per_chunk=$chunk_width; + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + # caution: we don't set the --frames-per-chunk here, we just use the + # default value of 50, which happens to be suitable because it's + # close to the primary chunk_width of 40. + steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + ${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1 + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_${dset} ${dir}/decode_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + [ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1 +fi + + +if [ $stage -le 15 ]; then + # 'looped' decoding. + # note: you should NOT do this decoding step for setups that have bidirectional + # recurrence, like BLSTMs-- it doesn't make sense and will give bd results. + # we didn't write a -parallel version of this program yet, + # so it will take a bit longer as the --num-threads option is not supported. + # we just hardcode the --frames-per-chunk option as it doesn't have to + # match any value used in training, and it won't affect the results (unlike + # regular decoding). + rm $dir/.error 2>/dev/null || true + for dset in dev test; do + ( + steps/nnet3/decode_looped.sh --nj $decode_nj --cmd "$decode_cmd" \ + --frames-per-chunk 30 \ + --online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \ + $graph_dir data/${dset}_hires $dir/decode_looped_${dset} || exit 1; + steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ + data/${dset}_hires ${dir}/decode_looped_${dset} ${dir}/decode_looped_${dset}_rescore || exit 1 + ) || touch $dir/.error & + done + wait + if [ -f $dir/.error ]; then + echo "$0: something went wrong in decoding" + exit 1 + fi +fi + + + +exit 0; diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh new file mode 100755 index 00000000000..07c3d4af233 --- /dev/null +++ b/egs/tedlium/s5_r2/local/nnet3/tuning/run_tdnn_lstm_1b_disc.sh @@ -0,0 +1,185 @@ +#!/bin/bash + +# This script does discriminative training on top of CE nnet3 system. To +# simplify things, this assumes you are using the "cleaned" data (since this is +# generally better), i.e. it won't work if you used options to run_tdnn_lstm_1b.sh +# to use the non-cleaned data. +# +# note: this relies on having a cluster that has plenty of CPUs as well as GPUs, +# since the alignment and the lattice generation/egs-dumping takes quite a bit +# of CPU time. + + +set -e +set -uo pipefail + +stage=1 +train_stage=-10 # can be used to start training in the middle. +get_egs_stage=0 +use_gpu=true # for training +cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like + # alignments and degs). +degs_dir= # set this to use preexisting degs. +nj=400 # have a high number of jobs because this could take a while, and we might + # have some stragglers. + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + +graph_dir=exp/tri3_cleaned/graph +srcdir=exp/nnet3_cleaned/tdnn_lstm1b_sp +train_data_dir=data/train_cleaned_sp_hires_comb +online_ivector_dir=exp/nnet3_cleaned/ivectors_train_cleaned_sp_hires_comb + +## Objective options +criterion=smbr +one_silence_class=true + +# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b" +# note, I ran without affix with learning rate 0.0000125, with disc_affic=slow +# with learning rate 0.000005, and with disc_affix=slow2 with learning rate 0.0000025. +# disc_affix=slow3 is with effective_learning_rate=0.000005 and last_layer_factor=0.1 + +disc_affix=slow3 + +dir=${srcdir}_${criterion}${disc_affix} + +## Egs options. Give quite a few choices of chunk length, +## so it can split utterances without much gap or overlap. +frames_per_eg=300,280,150,120,100 +frames_overlap_per_eg=0 +frames_per_chunk_egs=200 # for alignments and denlat creation. +frames_per_chunk_decoding=50 # for decoding; should be the same as the value + # used in the script that trained the nnet. + # We didn't set the frames_per_chunk in + # run_tdnn_lstm_1b.sh, so it defaults to 50. +## these context options should match the training condition. (chunk_left_context, +## chunk_right_context) +## We set --extra-left-context-initial 0 and --extra-right-context-final 0 +## directly in the script below, but this should also match the training condition. +## note: --extra-left-context should be the same as the chunk_left_context (or in +## general, the argument of --egs.chunk-left-context) in the baseline script. +extra_left_context=40 +extra_right_context=0 + + + +## Nnet training options +effective_learning_rate=0.000005 +last_layer_factor=0.1 +max_param_change=1 +num_jobs_nnet=4 +num_epochs=2 +regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options, + # in chain models. +minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up); + # if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up). + + +## Decode options +decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more. + +if $use_gpu; then + if ! cuda-compiled; then + cat <