Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* 'master' of https://github.com/kaldi-asr/kaldi:
  [build] Bump OpenFst version to v1.6.2 (kaldi-asr#1492)
  [src] nnet1: fixing issue in multi-task training (kaldi-asr#1491)
  [egs] Fixes to URLs in vystadial example script.
  [egs] Minor cosmetic changes in voxforge example script (kaldi-asr#1483)
  [scripts,egs] sMBR on LFR xent system with shifted feats (kaldi-asr#1477)
  [src] bug-fix in gst plugin code (issue on g++ 5.4.0) (kaldi-asr#1479)
  • Loading branch information
kronos-cm committed Mar 16, 2017
2 parents 8ed0c17 + eba49a0 commit c8fc981
Show file tree
Hide file tree
Showing 12 changed files with 315 additions and 16 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ GSYMS
/tools/openfst-1.6.0/
/tools/openfst-1.6.1.tar.gz
/tools/openfst-1.6.1/
/tools/openfst-1.6.2.tar.gz
/tools/openfst-1.6.2/
/tools/pa_stable_v19_20111121.tgz
/tools/portaudio/
/tools/sctk-2.4.0-20091110-0958.tar.bz2
Expand Down Expand Up @@ -131,4 +133,3 @@ GSYMS
/tools/sequitur-g2p/

/kaldiwin_vs*

1 change: 1 addition & 0 deletions egs/swbd/s5c/local/nnet3/run_tdnn_lfr_disc.sh
210 changes: 210 additions & 0 deletions egs/swbd/s5c/local/nnet3/tuning/run_tdnn_lfr1c_disc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
#!/bin/bash

# This script does discriminative training on top of the CE nnet3 LFR system
# from run_tdnn_lfr1c. To simplify things, this assumes you are using the
# "speed-perturbed" data
# (--speed_perturb true, which is the default) in the baseline run_tdnn_d.sh script.
#
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
# since the lattice generation runs in about real-time, so takes of the order of
# 1000 hours of CPU time.

# Comparing effect of shift:
# System tdnn_lfr1c_sp_smbr:1 tdnn_lfr1c_sp_smbr:2 tdnn_lfr1c_sp_smbr:3 tdnn_lfr1c_sp_fs_smbr:1 tdnn_lfr1c_sp_fs_smbr:2 tdnn_lfr1c_sp_fs_smbr:3
# WER on train_dev(tg) 16.26 16.11 16.02 16.02 15.77 15.78
# WER on train_dev(fg) 15.01 14.91 14.80 14.79 14.58 14.50
# WER on eval2000(tg) 18.9 18.7 18.6 18.6 18.5 18.5
# WER on eval2000(fg) 17.4 17.2 17.1 17.1 17.0 16.9


set -e
set -uo pipefail

stage=0
train_stage=-10 # can be used to start training in the middle.
get_egs_stage=0
use_gpu=true # for training
cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like
# alignments and degs).
degs_dir= # set this to use preexisting degs.
nj=65 # have a high number of jobs because this could take a while, and we might
# have some stragglers.

## Objective options
criterion=smbr
one_silence_class=true

# you can set --disc-affix if you run different configurations, e.g. --disc-affix "_b"
# originally ran with no affix, with effective_learning_rate=0.0000125;
# reran by mistake with no affix with effective_learning_rate=0.000005 [was a bit
# better, see NOTES, but still best after 1st epoch].
# reran again with affix=slow and effective_learning_rate=0.0000025
# reran again with affix=slow2 and effective_learning_rate=0.00000125 (this was
# about the best).
# before checking in the script, removed the slow2 affix but left with
# the lowest learning rate.
disc_affix=

## Egs options. Give quite a few choices of chunk length,
## so it can split utterances without much gap or overlap.
frames_per_eg=300,280,150,120,100
frames_overlap_per_eg=0
frames_per_chunk_decoding=200
## these context options should match the training condition. (chunk_left_context,
## chunk_right_context)
## We set --extra-left-context-initial 0 and --extra-right-context-final 0
## directly in the script below, but this should also match the training condition.
## Note: extra-left-context and extra-right-context are 0 because this is a TDNN,
## it's not a recurrent model like an LSTM or BLSTM.
extra_left_context=0
extra_right_context=0


## Nnet training options
effective_learning_rate=0.00000125
max_param_change=1
num_jobs_nnet=4
num_epochs=3
regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options,
# in chain models.
minibatch_size="300=32,16/150=64,32" # rule says: if chunk size is closer to 300, use minibatch size 32 (or 16 for mop-up);
# if chunk size is closer to 150, use mini atch size of 64 (or 32 for mop-up).
shift_feats=false

## Decode options
decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more.


. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh

srcdir=exp/nnet3/tdnn_lfr1c_sp
graph_dir=$srcdir/graph_sw1_tg
train_data_dir=data/train_nodup_sp_hires
online_ivector_dir=exp/nnet3/ivectors_train_nodup_sp
dir=${srcdir}_${criterion}${disc_affix}


if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
num_threads=1
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
fi

if [ ! -f ${srcdir}/final.mdl ]; then
echo "$0: expected ${srcdir}/final.mdl to exist"
exit 1;
fi


frame_subsampling_factor=1
if [ -f $srcdir/frame_subsampling_factor ]; then
frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor)
fi

affix= # Will be set if doing input frame shift
if [[ "$shift_feats" = true && $frame_subsampling_factor -ne 1 ]]; then
if [ $stage -le 0 ]; then
utils/data/shift_and_combine_feats.sh --write-utt2orig $dir/utt2orig \
$frame_subsampling_factor $train_data_dir ${train_data_dir}_fs
steps/online/nnet2/copy_ivector_dir.sh --utt2orig $dir/utt2orig \
$online_ivector_dir ${online_ivector_dir}_fs
rm $dir/utt2orig
fi
online_ivector_dir=${online_ivector_dir}_fs
train_data_dir=${train_data_dir}_fs
affix=_fs
fi

if [ $stage -le 1 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \
--scale-opts '--transition-scale=1.0 --acoustic-scale=0.333 --self-loop-scale=0.333' \
--frames-per-chunk $frames_per_chunk_decoding \
--extra-left-context $extra_left_context --extra-right-context $extra_right_context \
--extra-left-context-initial 0 --extra-right-context-final 0 \
--online-ivector-dir $online_ivector_dir \
--nj $nj $train_data_dir data/lang $srcdir ${srcdir}_ali${affix} ;
fi


if [ -z "$degs_dir" ]; then

if [ $stage -le 2 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
utils/create_split_dir.pl \
/export/b{09,10,11,12}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
fi
if [ -d ${srcdir}_degs/storage ]; then max_copy_jobs=10; else max_copy_jobs=5; fi

steps/nnet3/get_degs.sh \
--cmd "$decode_cmd --mem 10G" --num-threads 3 \
--self-loop-scale 0.333 --acwt 0.333 \
--max-copy-jobs $max_copy_jobs \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
--extra-left-context-initial 0 --extra-right-context-final 0 \
--frames-per-chunk-decoding "$frames_per_chunk_decoding" \
--stage $get_egs_stage \
--online-ivector-dir $online_ivector_dir \
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \
$train_data_dir data/lang ${srcdir} ${srcdir}_ali${affix} ${srcdir}_degs${affix} || exit 1
fi
fi

if [ $stage -le 3 ]; then
[ -z "$degs_dir" ] && degs_dir=${srcdir}_degs${affix}
steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \
--stage $train_stage \
--acoustic-scale 0.333 \
--effective-lrate $effective_learning_rate --max-param-change $max_param_change \
--criterion $criterion --drop-frames true \
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size "$minibatch_size" \
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
--regularization-opts "$regularization_opts" \
${degs_dir} $dir
fi

if [ $stage -le 4 ]; then
for x in `seq $decode_start_epoch $num_epochs`; do
for decode_set in train_dev eval2000; do
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
for iter in epoch$x epoch${x}_adj; do
(
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
--acwt 0.333 --post-decode-acwt 3.0 \
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_sw1_tg_${iter} || exit 1;

steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
$dir/decode_${decode_set}_sw1_{tg,fsh_fg}_${iter} || exit 1;
) &
done
done
done
fi
wait;

if [ $stage -le 5 ] && $cleanup; then
# if you run with "--cleanup true --stage 6" you can clean up.
# actually, keep the alignments in case we need them later.. they're slow to
# create, and quite big.
# rm ${srcdir}_ali/ali.*.gz || true

steps/nnet2/remove_egs.sh ${srcdir}_degs || true
fi

wait;
exit 0;
2 changes: 1 addition & 1 deletion egs/voxforge/s5/path.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
. $KALDI_ROOT/tools/config/common_path.sh

# VoxForge data will be stored in:
export DATA_ROOT="/home/dpovey/kaldi-clean/egs/voxforge/s5/voxforge" # e.g. something like /media/secondary/voxforge
# export DATA_ROOT="$KALDI_ROOT/egs/voxforge/s5/voxforge" # e.g. something like /media/secondary/voxforge

if [ -z $DATA_ROOT ]; then
echo "You need to set \"DATA_ROOT\" variable in path.sh to point to the directory to host VoxForge's data"
Expand Down
5 changes: 3 additions & 2 deletions egs/voxforge/s5/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

# NOTE: You will want to download the data set first, before executing this script.
# This can be done for example by:
# 1. Setting the DATA_ROOT variable to point to a directory with enough free
# space (at least 20-25GB currently (Feb 2014))
# 1. Setting the variable DATA_ROOT in path.sh to point to a
# directory with enough free space (at least 20-25GB
# currently (Feb 2014))
# 2. Running "getdata.sh"

# The second part of this script comes mostly from egs/rm/s5/run.sh
Expand Down
4 changes: 2 additions & 2 deletions egs/vystadial_cz/online_demo/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BEST_LINE=18
MODEL_PREFIX_URL=http://vystadial.ms.mff.cuni.cz/download/kaldi/src/pykaldi/pykaldi/binutils/
DATA_PREFIX_URL=http://vystadial.ms.mff.cuni.cz/download/kaldi/src/pykaldi/pykaldi/binutils/
MODEL_PREFIX_URL=https://vystadial.ms.mff.cuni.cz/download/pykaldi/egs/vystadial/online_demo
DATA_PREFIX_URL=https://vystadial.ms.mff.cuni.cz/download/pykaldi/egs/vystadial/online_demo

# Czech language models
LANG=cs
Expand Down
48 changes: 48 additions & 0 deletions egs/wsj/s5/steps/online/nnet2/copy_ivector_dir.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
# Apache 2.0

# This script copies the necessary parts of an online ivector directory
# optionally applying a mapping to the ivector_online.scp file

utt2orig=

. utils/parse_options.sh

if [ $# != 2 ]; then
echo "Usage: "
echo " $0 [options] <srcdir> <destdir>"
echo "e.g.:"
echo " $0 exp/nnet3/online_ivector_train exp/nnet3/online_ivector_train_fs"
echo "Options"
echo " --utt2orig=<file> # utterance id mapping to use"
exit 1;
fi


srcdir=$1
destdir=$2

if [ ! -f $srcdir/ivector_period ]; then
echo "$0: no such file $srcdir/ivector_period"
exit 1;
fi

if [ "$destdir" == "$srcdir" ]; then
echo "$0: this script requires <srcdir> and <destdir> to be different."
exit 1
fi

set -e;

mkdir -p $destdir
cp -r $srcdir/{conf,ivector_period} $destdir
if [ -z $utt2orig ]; then
cp $srcdir/ivector_online.scp $destdir
else
utils/apply_map.pl -f 2 $srcdir/ivector_online.scp < $utt2orig > $destdir/ivector_online.scp
fi
cp $srcdir/final.ie.id $destdir

echo "$0: Copied necessary parts of online ivector directory $srcdir to $destdir"
12 changes: 12 additions & 0 deletions egs/wsj/s5/utils/data/shift_and_combine_feats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

# Apache 2.0

write_utt2orig= # if provided, this script will write
# a mapping of shifted utterance ids
# to the original ones into the file
# specified by this option

echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. utils/parse_options.sh
Expand Down Expand Up @@ -34,11 +39,18 @@ if [ -f $destdir/feats.scp ]; then
exit 1
fi

if [ ! -z $write_utt2orig ]; then
awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig
fi

tmp_shift_destdirs=()
for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do
if [ "$frame_shift" == 0 ]; then continue; fi
utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1
tmp_shift_destdirs+=("${destdir}_fs$frame_shift")
if [ ! -z $write_utt2orig ]; then
awk -v prefix="fs$frame_shift-" '{printf("%s%s %s\n", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig
fi
done
utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1
rm -r ${tmp_shift_destdirs[@]}
Expand Down
6 changes: 3 additions & 3 deletions src/gst-plugin/gst-online-gmm-decode-faster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ gst_online_gmm_decode_faster_init(GstOnlineGmmDecodeFaster * filter) {
std::vector<std::pair<std::string, SimpleOptions::OptionInfo> > option_info_list;
option_info_list = filter->simple_options_->GetOptionInfoList();
int32 i = 0;
for (vector<std::pair<std::string,
for (std::vector<std::pair<std::string,
SimpleOptions::OptionInfo> >::iterator dx = option_info_list.begin();
dx != option_info_list.end(); dx++) {
std::pair<std::string, SimpleOptions::OptionInfo> result = (*dx);
Expand Down Expand Up @@ -747,7 +747,7 @@ gst_online_gmm_decode_faster_loop(GstOnlineGmmDecodeFaster * filter) {
std::vector<int32> word_ids;
filter->decoder_->FinishTraceBack(filter->out_fst_);
fst::GetLinearSymbolSequence(*(filter->out_fst_),
static_cast<vector<int32> *>(0),
static_cast<std::vector<int32> *>(0),
&word_ids,
static_cast<LatticeArc::Weight*>(0));
gst_online_gmm_decode_faster_push_words(filter, filter->srcpad_, word_ids, filter->word_syms_, partial_res || word_ids.size());
Expand All @@ -758,7 +758,7 @@ gst_online_gmm_decode_faster_loop(GstOnlineGmmDecodeFaster * filter) {
std::vector<int32> word_ids;
if (filter->decoder_->PartialTraceback(filter->out_fst_)) {
fst::GetLinearSymbolSequence(*(filter->out_fst_),
static_cast<vector<int32> *>(0),
static_cast<std::vector<int32> *>(0),
&word_ids,
static_cast<LatticeArc::Weight*>(0));
gst_online_gmm_decode_faster_push_words(filter, filter->srcpad_, word_ids, filter->word_syms_, false);
Expand Down
Loading

0 comments on commit c8fc981

Please sign in to comment.