diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh index a01505be695..690ef82e287 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir.sh @@ -9,31 +9,33 @@ set -o pipefail . path.sh -stage=0 -corruption_stage=-10 -corrupt_only=false - -# Data options +# The following are the main parameters to modify data_dir=data/train_si284 # Expecting whole data directory. -speed_perturb=true +vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + num_data_reps=5 # Number of corrupted versions -snrs="20:10:15:5:0:-5" foreground_snrs="20:10:15:5:0:-5" background_snrs="20:10:15:5:2:0:-2:-5" -base_rirs=simulated -speeds="0.9 1.0 1.1" + +stage=0 # Parallel options -reco_nj=40 -cmd=queue.pl +nj=4 +cmd=run.pl # Options for feature extraction mfcc_config=conf/mfcc_hires_bp.conf feat_suffix=hires_bp -reco_vad_dir= # Output of prepare_unsad_data.sh. - # If provided, the speech labels and deriv weights will be - # copied into the output data directory. +# Data options +corrupt_only=false +speed_perturb=true +speeds="0.9 1.0 1.1" +resample_data_dir=false + + . utils/parse_options.sh @@ -45,16 +47,21 @@ fi data_id=`basename ${data_dir}` rvb_opts=() -if [ "$base_rirs" == "simulated" ]; then - # This is the config for the system using simulated RIRs and point-source noises - rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") - rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") - rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") - rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") -else - # This is the config for the JHU ASpIRE submission system - rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") - rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") +rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") + +if $resample_data_dir; then + sample_frequency=`cat $mfcc_config | perl -ne 'if (m/--sample-frequency=(\S+)/) { print $1; }'` + if [ -z "$sample_frequency" ]; then + sample_frequency=16000 + fi + + utils/data/resample_data_dir.sh $sample_frequency ${data_dir} || exit 1 + data_id=`basename ${data_dir}` + rvb_opts+=(--source-sampling-rate=$sample_frequency) fi corrupted_data_id=${data_id}_corrupted @@ -119,17 +126,17 @@ else fi if [ $stage -le 8 ]; then - if [ ! -z "$reco_vad_dir" ]; then - if [ ! -f $reco_vad_dir/speech_labels.scp ]; then - echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + if [ ! -z "$vad_dir" ]; then + if [ ! -f $vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $vad_dir/speech_labels.scp" exit 1 fi - cat $reco_vad_dir/speech_labels.scp | \ + cat $vad_dir/speech_labels.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp - cat $reco_vad_dir/deriv_weights.scp | \ + cat $vad_dir/deriv_weights.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp fi diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh index 8a5a552b2ab..7e7d84c0010 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_music.sh @@ -13,21 +13,25 @@ set -o pipefail data_dir=data/train_si284 vad_dir= # Location of directory with VAD labels -num_data_reps=5 +num_data_reps=5 # Number of corrupted versions foreground_snrs="5:2:1:0:-2:-5:-10:-20" background_snrs="5:2:1:0:-2:-5:-10:-20" -cmd=run.pl +stage=0 + +# Parallel options nj=4 +cmd=run.pl -stage=0 +# Options for feature extraction mfcc_config=conf/mfcc_hires_bp.conf feat_suffix=hires_bp -dry_run=false # If true, exits after preparing the corrupted wav.scp +corrupt_only=false speed_perturb=true speeds="0.9 1.0 1.1" +resample_data_dir=false label_dir=music_labels # Directory to dump music labels @@ -70,6 +74,17 @@ for f in RIRS_NOISES/simulated_rirs/smallroom/rir_list \ echo "$0: Could not find $f" && exit 1 done +if $resample_data_dir; then + sample_frequency=`cat $mfcc_config | perl -ne 'if (m/--sample-frequency=(\S+)/) { print $1; }'` + if [ -z "$sample_frequency" ]; then + sample_frequency=16000 + fi + + utils/data/resample_data_dir.sh $sample_frequency ${data_dir} || exit 1 + data_id=`basename ${data_dir}` + rvb_opts+=(--source-sampling-rate=$sample_frequency) +fi + corrupted_data_id=${data_id}_music_corrupted orig_corrupted_data_id=$corrupted_data_id @@ -87,10 +102,6 @@ if [ $stage -le 1 ]; then data/${data_id} data/${corrupted_data_id} fi -if $dry_run; then - exit 0 -fi - corrupted_data_dir=data/${corrupted_data_id} # Data dir without speed perturbation orig_corrupted_data_dir=$corrupted_data_dir @@ -111,6 +122,11 @@ if $speed_perturb; then fi fi +if $corrupt_only; then + echo "$0: Got corrupted data directory in ${corrupted_data_dir}" + exit 0 +fi + mfccdir=`basename $mfcc_config` mfccdir=${mfccdir%%.conf} @@ -215,11 +231,13 @@ if [ $stage -le 7 ]; then ark:$music_dir/music_segmentation.JOB.ark \ ark,scp:$label_dir/music_labels_${corrupted_data_id}.JOB.ark,$label_dir/music_labels_${corrupted_data_id}.JOB.scp fi -fi -for n in `seq $nj`; do - cat $label_dir/music_labels_${corrupted_data_id}.$n.scp -done | utils/filter_scp.pl ${corrupted_data_dir}/utt2spk > ${corrupted_data_dir}/music_labels.scp + for n in `seq $nj`; do + cat $label_dir/music_labels_${corrupted_data_id}.$n.scp + done | \ + steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps "music" | \ + utils/filter_scp.pl ${corrupted_data_dir}/utt2spk > ${corrupted_data_dir}/music_labels.scp +fi if [ $stage -le 8 ]; then utils/split_data.sh --per-utt ${corrupted_data_dir} $nj diff --git a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh index 19b4036c9aa..d98b98bd3ac 100755 --- a/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh +++ b/egs/aspire/s5/local/segmentation/do_corruption_data_dir_snr.sh @@ -9,32 +9,33 @@ set -o pipefail . path.sh -stage=0 -corruption_stage=-10 -corrupt_only=false - -# Data options +# The following are the main parameters to modify data_dir=data/train_si284 # Expecting whole data directory. -speed_perturb=true +vad_dir= # Output of prepare_unsad_data.sh. + # If provided, the speech labels and deriv weights will be + # copied into the output data directory. + num_data_reps=5 # Number of corrupted versions -snrs="20:10:15:5:0:-5" foreground_snrs="20:10:15:5:0:-5" background_snrs="20:10:15:5:2:0:-2:-5" -base_rirs=simulated -speeds="0.9 1.0 1.1" -resample_data_dir=false + +stage=0 # Parallel options -reco_nj=40 -cmd=queue.pl +nj=4 +cmd=run.pl # Options for feature extraction mfcc_config=conf/mfcc_hires_bp.conf feat_suffix=hires_bp -reco_vad_dir= # Output of prepare_unsad_data.sh. - # If provided, the speech labels and deriv weights will be - # copied into the output data directory. +# Data options +corrupt_only=false +speed_perturb=true +speeds="0.9 1.0 1.1" +resample_data_dir=false + + . utils/parse_options.sh @@ -45,19 +46,25 @@ fi data_id=`basename ${data_dir}` -rvb_opts=() -if [ "$base_rirs" == "simulated" ]; then - # This is the config for the system using simulated RIRs and point-source noises - rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") - rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") - rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") - rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") -else - # This is the config for the JHU ASpIRE submission system - rvb_opts+=(--rir-set-parameters "1.0, RIRS_NOISES/real_rirs_isotropic_noises/rir_list") - rvb_opts+=(--noise-set-parameters RIRS_NOISES/real_rirs_isotropic_noises/noise_list) +if [ ! -d RIRS_NOISES/ ]; then + # Prepare MUSAN rirs and noises + wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip + unzip rirs_noises.zip fi +rvb_opts=() +# This is the config for the system using simulated RIRs and point-source noises +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list") +rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list") +rvb_opts+=(--noise-set-parameters "0.1, RIRS_NOISES/pointsource_noises/background_noise_list") +rvb_opts+=(--noise-set-parameters "0.9, RIRS_NOISES/pointsource_noises/foreground_noise_list") + +for f in RIRS_NOISES/simulated_rirs/smallroom/rir_list \ + RIRS_NOISES/simulated_rirs/mediumroom/rir_list \ + $data_dir/wav.scp; do + echo "$0: Could not find $f" && exit 1 +done + if $resample_data_dir; then sample_frequency=`cat $mfcc_config | perl -ne 'if (m/--sample-frequency=(\S+)/) { print $1; }'` if [ -z "$sample_frequency" ]; then @@ -134,7 +141,7 @@ if [ $stage -le 4 ]; then utils/copy_data_dir.sh $corrupted_data_dir ${corrupted_data_dir}_$feat_suffix corrupted_data_dir=${corrupted_data_dir}_$feat_suffix steps/make_mfcc.sh --mfcc-config $mfcc_config \ - --cmd "$cmd" --nj $reco_nj \ + --cmd "$cmd" --nj $reco_nj --write-utt2num-frames true \ $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir steps/compute_cmvn_stats.sh --fake \ $corrupted_data_dir exp/make_${feat_suffix}/${corrupted_data_id} $mfccdir @@ -202,7 +209,7 @@ if [ $stage -le 7 ]; then --cepstral-lifter=$cepstral_lifter \ exp/make_irm_targets/$corrupted_data_id/idct_matrix - # Get log-SNR targets + # Get log-IRM targets steps/segmentation/make_snr_targets.sh \ --nj $reco_nj --cmd "$cmd" \ --target-type Irm --compress false \ @@ -213,21 +220,21 @@ fi if [ $stage -le 8 ]; then - if [ ! -z "$reco_vad_dir" ]; then - if [ ! -f $reco_vad_dir/speech_labels.scp ]; then - echo "$0: Could not find file $reco_vad_dir/speech_labels.scp" + if [ ! -z "$vad_dir" ]; then + if [ ! -f $vad_dir/speech_labels.scp ]; then + echo "$0: Could not find file $vad_dir/speech_labels.scp" exit 1 fi - cat $reco_vad_dir/speech_labels.scp | \ + cat $vad_dir/speech_labels.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ sort -k1,1 > ${corrupted_data_dir}/speech_labels.scp - cat $reco_vad_dir/deriv_weights.scp | \ + cat $vad_dir/deriv_weights.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ sort -k1,1 > ${corrupted_data_dir}/deriv_weights.scp - cat $reco_vad_dir/deriv_weights_manual_seg.scp | \ + cat $vad_dir/deriv_weights_manual_seg.scp | \ steps/segmentation/get_reverb_scp.pl -f 1 $num_data_reps | \ sort -k1,1 > ${corrupted_data_dir}/deriv_weights_for_irm_targets.scp fi diff --git a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh index cccc7e2db84..df40337e17e 100755 --- a/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh +++ b/egs/aspire/s5/local/segmentation/prepare_unsad_data.sh @@ -11,8 +11,8 @@ set -e stage=-2 cmd=queue.pl -reco_nj=40 -nj=100 +reco_nj=40 # Number of jobs to work at recording-level +nj=100 # Number of jobs to work at utterance-level # Options to be passed to get_sad_map.py map_noise_to_sil=true # Map noise phones to silence label (0) @@ -21,7 +21,8 @@ sad_map= # Initial mapping from phones to speech/non-speech labels. # Overrides the default mapping using phones/silence.txt # and phones/nonsilence.txt -# Options for feature extraction +# Options for feature extraction +# (These must match the features used for model_dir and sat_model_dir) feat_type=mfcc # mfcc or plp add_pitch=false # Add pitch features @@ -117,10 +118,10 @@ function make_mfcc { fi if $add_pitch; then - steps/make_mfcc_pitch.sh --cmd "$cmd" --nj $nj \ + steps/make_mfcc_pitch.sh --cmd "$cmd" --nj $nj --write-utt2num-frames true \ --mfcc-config $mfcc_config --pitch-config $pitch_config $1 $2 $3 || exit 1 else - steps/make_mfcc.sh --cmd "$cmd" --nj $nj \ + steps/make_mfcc.sh --cmd "$cmd" --nj $nj --write-utt2num-frames true \ --mfcc-config $mfcc_config $1 $2 $3 || exit 1 fi @@ -160,10 +161,10 @@ function make_plp { fi if $add_pitch; then - steps/make_plp_pitch.sh --cmd "$cmd" --nj $nj \ + steps/make_plp_pitch.sh --cmd "$cmd" --nj $nj --write-utt2num-frames true \ --plp-config $plp_config --pitch-config $pitch_config $1 $2 $3 || exit 1 else - steps/make_plp.sh --cmd "$cmd" --nj $nj \ + steps/make_plp.sh --cmd "$cmd" --nj $nj --write-utt2num-frames true \ --plp-config $plp_config $1 $2 $3 || exit 1 fi } @@ -177,15 +178,16 @@ data_id=$(basename $data_dir) whole_data_dir=${data_dir}_whole whole_data_id=${data_id}_whole -if [ $stage -le -2 ]; then +if [ $stage -le -3 ]; then steps/segmentation/get_sad_map.py \ --init-sad-map="$sad_map" \ --map-noise-to-sil=$map_noise_to_sil \ --map-unk-to-speech=$map_unk_to_speech \ $lang | utils/sym2int.pl -f 1 $lang/phones.txt > $dir/sad_map +fi +if [ $stage -le 2 ]; then utils/data/convert_data_dir_to_whole.sh ${data_dir} ${whole_data_dir} - utils/data/get_utt2dur.sh ${whole_data_dir} fi if $speed_perturb; then @@ -232,11 +234,6 @@ if $speed_perturb; then data_id=${data_id}_sp fi - -############################################################################### -# Compute length of recording -############################################################################### - if [ $stage -le 0 ]; then utils/subsegment_data_dir.sh $whole_data_dir ${data_dir}/segments ${data_dir}/tmp cp $data_dir/tmp/feats.scp $data_dir @@ -300,14 +297,6 @@ if [ $stage -le 4 ]; then fi -#utils/split_data.sh --per-reco $data_dir $reco_nj -#segmentation-combine-segments ark,s:$vad_dir/sad_seg.scp -# "ark,s:segmentation-init-from-segments --shift-to-zero=false --frame-shift=$ali_frame_shift --frame-overlap=$ali_frame_overlap ${data}/split${reco_nj}reco/JOB/segments ark:- |" \ -# "ark:cat ${data}/split${reco_nj}reco/JOB/segments | cut -d ' ' -f 1,2 | utils/utt2spk_to_spk2utt.pl | sort -k1,1 |" ark:- - -############################################################################### - - # Create extended data directory that consists of the provided # segments along with the segments outside it. # This is basically dividing the whole recording into pieces @@ -320,49 +309,30 @@ fi outside_data_dir=$dir/${data_id}_outside if [ $stage -le 5 ]; then - rm -rf $outside_data_dir - mkdir -p $outside_data_dir/split${reco_nj}reco + rm -r $outside_data_dir || true for f in wav.scp reco2file_and_channel stm glm; do [ -f ${data_dir}/$f ] && cp ${data_dir}/$f $outside_data_dir done - steps/segmentation/split_data_on_reco.sh $data_dir $whole_data_dir $reco_nj - - for n in `seq $reco_nj`; do - dsn=$whole_data_dir/split${reco_nj}reco/$n - awk '{print $2}' $dsn/segments | \ - utils/filter_scp.pl /dev/stdin $whole_data_dir/utt2num_frames > \ - $dsn/utt2num_frames - mkdir -p $outside_data_dir/split${reco_nj}reco/$n - done + utils/data/get_utt2num_frames.sh $whole_data_dir - $cmd JOB=1:$reco_nj $outside_data_dir/log/get_empty_segments.JOB.log \ + $cmd $outside_data_dir/log/get_empty_segments.log \ segmentation-init-from-segments --frame-shift=$frame_shift \ - --frame-overlap=$frame_overlap --shift-to-zero=false \ - ${data_dir}/split${reco_nj}reco/JOB/segments ark:- \| \ + --frame-overlap=$frame_overlap --shift-to-zero=false \ + ${data_dir}/segments ark:- \| \ segmentation-combine-segments-to-recordings ark:- \ - "ark,t:cut -d ' ' -f 1,2 ${data_dir}/split${reco_nj}reco/JOB/segments | utils/utt2spk_to_spk2utt.pl |" ark:- \| \ + "ark,t:utils/data/get_reco2utt.sh ${data_dir} |" ark:- \| \ segmentation-create-subsegments --filter-label=1 --subsegment-label=0 \ - "ark:segmentation-init-from-lengths --label=1 ark,t:${whole_data_dir}/split${reco_nj}reco/JOB/utt2num_frames ark:- |" \ - ark:- ark:- \| \ + "ark:segmentation-init-from-lengths --label=1 ark,t:${whole_data_dir}/utt2num_frames ark:- |" \ + ark:- ark:- \| \ segmentation-post-process --remove-labels=0 --max-segment-length=1000 \ - --post-process-label=1 --overlap-length=50 \ - ark:- ark:- \| segmentation-to-segments --single-speaker=true \ - --frame-shift=$frame_shift --frame-overlap=$frame_overlap \ - ark:- ark,t:$outside_data_dir/split${reco_nj}reco/JOB/utt2spk \ - $outside_data_dir/split${reco_nj}reco/JOB/segments || exit 1 - - for n in `seq $reco_nj`; do - cat $outside_data_dir/split${reco_nj}reco/$n/utt2spk - done | sort -k1,1 > $outside_data_dir/utt2spk - - for n in `seq $reco_nj`; do - cat $outside_data_dir/split${reco_nj}reco/$n/segments - done | sort -k1,1 > $outside_data_dir/segments + --post-process-label=1 --overlap-length=50 ark:- ark:- \| \ + segmentation-to-segments --single-speaker=true \ + --frame-shift=$frame_shift --frame-overlap=$frame_overlap \ + ark:- ark,t:$outside_data_dir/utt2spk $outside_data_dir/segments utils/fix_data_dir.sh $outside_data_dir - fi @@ -378,8 +348,6 @@ if [ $stage -le 7 ]; then utils/fix_data_dir.sh $outside_data_dir utils/combine_data.sh $extended_data_dir $data_dir $outside_data_dir - - steps/segmentation/split_data_on_reco.sh $data_dir $extended_data_dir $reco_nj fi ############################################################################### @@ -442,15 +410,29 @@ reco_vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; echo $reco_nj > $reco_vad_dir/num_jobs if [ $stage -le 11 ]; then + utils/data/get_reco2utt.sh $extended_data_dir > $reco_vad_dir/reco2utt + splits= + for n in `seq $reco_nj`; do + splits="$splits $reco_vad_dir/reco2utt.$n.$reco_nj" + done + utils/split_scp.pl $reco_vad_dir/reco2utt $splits + + for n in `seq $reco_nj`; do + utils/spk2utt_to_utt2spk.pl $reco_vad_dir/reco2utt.$n.$reco_nj > $reco_vad_dir/utt2reco.$n.$reco_nj + done + $cmd JOB=1:$reco_nj $reco_vad_dir/log/intersect_vad.JOB.log \ - segmentation-intersect-segments --mismatch-label=10 \ - "scp:cat $vad_dir/sad_seg.scp $vad_dir/outside_sad_seg.scp | sort -k1,1 | utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk |" \ - "scp:utils/filter_scp.pl $extended_data_dir/split${reco_nj}reco/JOB/utt2spk $decode_vad_dir/sad_seg.scp |" \ - ark:- \| segmentation-post-process --remove-labels=10 \ - --merge-adjacent-segments --max-intersegment-length=10 ark:- ark:- \| \ - segmentation-combine-segments ark:- "ark:segmentation-init-from-segments --shift-to-zero=false $extended_data_dir/split${reco_nj}reco/JOB/segments ark:- |" \ - ark,t:$extended_data_dir/split${reco_nj}reco/JOB/reco2utt \ - ark,scp:$reco_vad_dir/sad_seg.JOB.ark,$reco_vad_dir/sad_seg.JOB.scp + segmentation-intersect-segments --mismatch-label=1000 \ + "scp:cat $vad_dir/sad_seg.scp $vad_dir/outside_sad_seg.scp | sort -k1,1 | utils/filter_scp.pl $reco_vad_dir/utt2reco.JOB.$reco_nj |" \ + "scp:utils/filter_scp.pl $reco_vad_dir/utt2reco.JOB.$reco_nj $decode_vad_dir/sad_seg.scp |" \ + ark:- \| \ + segmentation-post-process --remove-labels=1000 \ + --merge-adjacent-segments --max-intersegment-length=10 ark:- ark:- \| \ + segmentation-combine-segments ark:- \ + "ark:utils/filter_scp.pl $reco_vad_dir/utt2reco.JOB.$reco_nj $extended_data_dir/segments | segmentation-init-from-segments --shift-to-zero=false - ark:- |" \ + ark,t:$reco_vad_dir/reco2utt.JOB.$reco_nj \ + ark,scp:$reco_vad_dir/sad_seg.JOB.ark,$reco_vad_dir/sad_seg.JOB.scp + for n in `seq $reco_nj`; do cat $reco_vad_dir/sad_seg.$n.scp done > $reco_vad_dir/sad_seg.scp @@ -464,51 +446,58 @@ for n in `seq $reco_nj`; do done set -e +# Deriv weights to train only on "good" frames, i.e. where alignment and decoding match if [ $stage -le 12 ]; then $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights.JOB.log \ segmentation-post-process --merge-labels=0:1:2:3 --merge-dst-label=1 \ - scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ - ark,scp:$reco_vad_dir/deriv_weights.JOB.ark,$reco_vad_dir/deriv_weights.JOB.scp + ark,scp:$reco_vad_dir/deriv_weights.JOB.ark,$reco_vad_dir/deriv_weights.JOB.scp for n in `seq $reco_nj`; do cat $reco_vad_dir/deriv_weights.$n.scp done > $reco_vad_dir/deriv_weights.scp fi +# Deriv weights to train only on silence frames if [ $stage -le 13 ]; then $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_deriv_weights_for_uncorrupted.JOB.log \ - segmentation-post-process --remove-labels=1:2:3 scp:$reco_vad_dir/sad_seg.JOB.scp \ - ark:- \| segmentation-post-process --merge-labels=0 --merge-dst-label=1 ark:- ark:- \| \ + segmentation-post-process --remove-labels=1:2:3 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ + segmentation-post-process --merge-labels=0 --merge-dst-label=1 ark:- ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames ark:- ark,t:- \| \ steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ - ark,scp:$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.ark,$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.scp + ark,scp:$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.ark,$reco_vad_dir/deriv_weights_for_uncorrupted.JOB.scp + for n in `seq $reco_nj`; do cat $reco_vad_dir/deriv_weights_for_uncorrupted.$n.scp done > $reco_vad_dir/deriv_weights_for_uncorrupted.scp fi +# Get per-frame SAD labels at recording-level if [ $stage -le 14 ]; then $cmd JOB=1:$reco_nj $reco_vad_dir/log/get_speech_labels.JOB.log \ segmentation-copy --keep-label=1 scp:$reco_vad_dir/sad_seg.JOB.scp ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ - ark:- ark,scp:$reco_vad_dir/speech_labels.JOB.ark,$reco_vad_dir/speech_labels.JOB.scp + ark:- ark,scp:$reco_vad_dir/speech_labels.JOB.ark,$reco_vad_dir/speech_labels.JOB.scp + for n in `seq $reco_nj`; do cat $reco_vad_dir/speech_labels.$n.scp done > $reco_vad_dir/speech_labels.scp fi +# Deriv weights to train only on manual segments if [ $stage -le 15 ]; then $cmd JOB=1:$reco_nj $reco_vad_dir/log/convert_manual_segments_to_deriv_weights.JOB.log \ - segmentation-init-from-segments --shift-to-zero=false \ - $data_dir/split${reco_nj}reco/JOB/segments ark:- \| \ + segmentation-init-from-segments --shift-to-zero=false --frame-shift=$frame_shift --frame-overlap=$frame_overlap \ + "utils/filter_scp.pl $reco_vad_dir/utt2reco.JOB.$reco_nj $data_dir/segments |" ark:- \| \ segmentation-combine-segments-to-recordings ark:- \ - ark:$data_dir/split${reco_nj}reco/JOB/reco2utt ark:- \| \ + "ark,t:utils/data/get_reco2utt.sh $data_dir | utils/filter_scp.pl $reco_vad_dir/reco2utt.JOB.$reco_nj |" \ + ark:- \| \ segmentation-to-ali --lengths-rspecifier=ark,t:${whole_data_dir}/utt2num_frames \ - ark:- ark,t:- \| \ + ark:- ark,t:- \| \ steps/segmentation/convert_ali_to_vec.pl \| copy-vector ark,t:- \ - ark,scp:$reco_vad_dir/deriv_weights_manual_seg.JOB.ark,$reco_vad_dir/deriv_weights_manual_seg.JOB.scp + ark,scp:$reco_vad_dir/deriv_weights_manual_seg.JOB.ark,$reco_vad_dir/deriv_weights_manual_seg.JOB.scp for n in `seq $reco_nj`; do cat $reco_vad_dir/deriv_weights_manual_seg.$n.scp diff --git a/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh b/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh deleted file mode 100755 index b85c4493cdf..00000000000 --- a/egs/wsj/s5/steps/segmentation/split_data_on_reco.sh +++ /dev/null @@ -1,34 +0,0 @@ -#! /bin/bash - -# Copyright 2016 Vimal Manohar -# Apache 2.0 - -set -e - -if [ $# -ne 3 ]; then - echo "This script splits on recording-level in the same " - echo "way as is split." - echo "Usage: split_data_on_reco.sh " - exit 1 -fi - -ref_data=$1 -data=$2 -nj=$3 - -utils/data/get_reco2utt.sh $ref_data -utils/data/get_reco2utt.sh $data - -utils/split_data.sh --per-reco $ref_data $nj - -for n in `seq $nj`; do - srn=$ref_data/split${nj}reco/$n - dsn=$data/split${nj}reco/$n - - mkdir -p $dsn - - utils/data/get_reco2utt.sh $srn - utils/filter_scp.pl $srn/reco2utt $data/reco2utt > $dsn/reco2utt - utils/spk2utt_to_utt2spk.pl $dsn/reco2utt > $dsn/utt2reco - utils/subset_data_dir.sh --utt-list $dsn/utt2reco $data $dsn -done diff --git a/egs/wsj/s5/utils/split_data.sh b/egs/wsj/s5/utils/split_data.sh index 94ba4f555ce..bc5894e7551 100755 --- a/egs/wsj/s5/utils/split_data.sh +++ b/egs/wsj/s5/utils/split_data.sh @@ -16,28 +16,20 @@ # limitations under the License. split_per_spk=true -split_per_reco=false if [ "$1" == "--per-utt" ]; then split_per_spk=false shift -elif [ "$1" == "--per-reco" ]; then - split_per_spk=false - split_per_reco=true - shift fi if [ $# != 2 ]; then - echo "Usage: $0 [--per-utt|--per-reco] " + echo "Usage: $0 [--per-utt] " echo "E.g.: $0 data/train 50" echo "It creates its output in e.g. data/train/split50/{1,2,3,...50}, or if the " echo "--per-utt option was given, in e.g. data/train/split50utt/{1,2,3,...50}." - echo "If the --per-reco option was given, in e.g. data/train/split50reco/{1,2,3,...50}." echo "" echo "This script will not split the data-dir if it detects that the output is newer than the input." echo "By default it splits per speaker (so each speaker is in only one split dir)," echo "but with the --per-utt option it will ignore the speaker information while splitting." - echo "But if --per-reco option is given, it splits per recording " - echo "(so each recording is in only one split dir)" exit 1 fi @@ -75,14 +67,10 @@ if [ -f $data/text ] && [ $nu -ne $nt ]; then echo "** use utils/fix_data_dir.sh to fix this." fi + if $split_per_spk; then utt2spk_opt="--utt2spk=$data/utt2spk" utt="" -elif $split_per_reco; then - utils/data/get_reco2utt.sh $data - utils/spk2utt_to_utt2spk.pl $data/reco2utt > $data/utt2reco - utt2spk_opt="--utt2spk=$data/utt2reco" - utt="reco" else utt2spk_opt= utt="utt" @@ -106,7 +94,6 @@ if ! $need_to_split; then fi utt2spks=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2spk; done) -utt2recos=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n/utt2reco; done) directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}${utt}/$n; done) @@ -121,20 +108,11 @@ fi which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock trap 'rm -f $data/.split_lock' EXIT HUP INT PIPE TERM -if $split_per_reco; then - utils/split_scp.pl $utt2spk_opt $data/utt2reco $utt2recos || exit 1 -else - utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 -fi +utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1 for n in `seq $numsplit`; do dsn=$data/split${numsplit}${utt}/$n - - if $split_per_reco; then - utils/filter_scp.pl $dsn/utt2reco $data/utt2spk > $dsn/utt2spk - fi - - utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1 + utils/utt2spk_to_spk2utt.pl $dsn/utt2spk > $dsn/spk2utt || exit 1; done maybe_wav_scp= @@ -176,12 +154,6 @@ if [ -f $data/segments ]; then $data/split${numsplit}${utt}/JOB/tmp.reco $data/wav.scp \ $data/split${numsplit}${utt}/JOB/wav.scp || exit 1 fi - if [ -f $data/reco2utt ]; then - utils/filter_scps.pl JOB=1:$numsplit \ - $data/split${numsplit}${utt}/JOB/tmp.reco $data/reco2utt \ - $data/split${numsplit}${utt}/JOB/reco2utt || exit 1 - fi - for f in $data/split${numsplit}${utt}/*/tmp.reco; do rm $f; done fi