-
Notifications
You must be signed in to change notification settings - Fork 5.3k
/
perturb_data_dir_speed_3way.sh
executable file
·89 lines (72 loc) · 3.75 KB
/
perturb_data_dir_speed_3way.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env bash
# Copyright 2016-2018 Johns Hopkins University (author: Daniel Povey)
# 2018 Hossein Hadian
# Apache 2.0
# This script does the standard 3-way speed perturbing of
# a data directory (it operates on the wav.scp).
# If you add the option "--always-include-prefix true", it will include the
# prefix "sp1.0-" for the original un-perturbed data. This can help resolve
# problems with sorting.
# We don't make '--always-include-prefix true' the default behavior because
# it can break some older scripts that relied on the original utterance-ids
# being a subset of the perturbed data's utterance-ids.
always_include_prefix=false
include_spk_prefix=true
. utils/parse_options.sh
if [ $# != 2 ]; then
echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>"
echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
echo "e.g.:"
echo " $0 [options] data/train data/train_sp"
echo "Note: if <destdir>/feats.scp already exists, this will refuse to run."
echo "Options:"
echo " --always-include-prefix [true|false] # default: false. If set to true,"
echo " # it will add the prefix 'sp1.0-' to"
echo " # utterance and speaker-ids for data at"
echo " # the original speed. Can resolve"
echo " # issues RE data sorting."
echo " --include-spk-prefix [true|false] # default: true. If set to true,"
echo " # it will add the prefix 'sp-' to"
echo " # speaker-ids, making number of speakers"
echo " # 3 times more. This is useful for speaker"
echo " # adaptation part of ASR."
exit 1
fi
srcdir=$1
destdir=$2
if [ ! -f $srcdir/wav.scp ]; then
echo "$0: expected $srcdir/wav.scp to exist"
exit 1
fi
if [ -f $destdir/feats.scp ]; then
echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
exit 1
fi
# we need to make sure all files in source directory are in sorted order
utils/fix_data_dir.sh ${srcdir} || exit 1;
echo "$0: making sure the utt2dur and the reco2dur files are present"
echo "... in ${srcdir}, because obtaining it after speed-perturbing"
echo "... would be very slow, and you might need them."
utils/data/get_utt2dur.sh ${srcdir}
utils/data/get_reco2dur.sh ${srcdir}
utils/data/perturb_data_dir_speed.sh --include-spk-prefix $include_spk_prefix 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
utils/data/perturb_data_dir_speed.sh --include-spk-prefix $include_spk_prefix 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
if $always_include_prefix; then
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
if [ ! -f $srcdir/utt2uniq ]; then
cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
else
cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
fi
utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0
else
utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
fi
echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
if ! utils/validate_data_dir.sh --no-feats --no-text $destdir; then
echo "$0: Validation failed. If it is a sorting issue, try the option '--always-include-prefix true'."
exit 1
fi
exit 0