Skip to content

Commit

Permalink
[egs] fixes to babel pipeline; thanks to Fred Richardson (#1509)
Browse files Browse the repository at this point in the history
  • Loading branch information
jtrmal authored and danpovey committed Mar 23, 2017
1 parent ef415a7 commit 9a6feea
Show file tree
Hide file tree
Showing 57 changed files with 4,359 additions and 190 deletions.
4 changes: 2 additions & 2 deletions egs/babel/s5d/conf/common.fullLP
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ babel_type=full

use_pitch=true

lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 18 )
lmwt_plp_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
lmwt_chain_extra_opts=( --min-lmwt 4 --max-lmwt 22 )
lmwt_chain_extra_opts=( --min-lmwt 9 --max-lmwt 13 )

dnn_beam=16.0
dnn_lat_beam=8.5
Expand Down
1 change: 1 addition & 0 deletions egs/babel/s5d/conf/common_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cer=0

#Declaring here to make the definition inside the language conf files more
# transparent and nice
declare -A train_kwlists
declare -A dev10h_kwlists
declare -A dev2h_kwlists
declare -A evalpart1_kwlists
Expand Down
4 changes: 2 additions & 2 deletions egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#speech corpora files location
train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training
train_data_list=/export/babel/data/splits/Pashto_Babel104/train.FullLP.list
train_data_list=./conf/lists/104-pashto/training.list
train_nj=32

#RADICAL DEV2H data files
Expand All @@ -22,7 +22,7 @@ dev2h_nj=18

#Official DEV data files
dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list
dev10h_data_list=./conf/lists/104-pashto/dev.list
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
Expand Down
9 changes: 8 additions & 1 deletion egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
#speech corpora files location
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.fullLP.list
train_nj=32
#train_nj=32
train_ecf_file=./data/train/ecf.train.xml
train_rttm_file=./exp/tri5/rttm
train_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
train_nj=64

#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
Expand Down
18 changes: 10 additions & 8 deletions egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ train_nj=32
#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev
dev2h_data_list=./conf/lists/305-guarani//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -24,11 +25,12 @@ dev2h_subset_ecf=true
#Official DEV corpora files location
dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev
dev10h_data_list=./conf/lists/305-guarani//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
10 changes: 8 additions & 2 deletions egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml
) # dev10h_kwlists
dev10h_nj=32

Expand All @@ -42,5 +44,9 @@ unsup_nj=32
lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.txt
lexiconFlags="--romanized --oov <unk>"

extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1


6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
8 changes: 4 additions & 4 deletions egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,9 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist3.xml

[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
78 changes: 78 additions & 0 deletions egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# include common settings for fullLP systems.
. conf/common.fullLP || exit 1;


#speech corpora files location
train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
train_data_list=./conf/lists/404-georgian//training.list
train_nj=32


#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev2h_data_list=./conf/lists/404-georgian//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev2h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true


#Official DEV corpora files location
dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev10h_data_list=./conf/lists/404-georgian//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev10h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32


#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval
eval_data_list=./conf/lists/404-georgian//eval.list
eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-eval.ecf.xml
eval_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # eval_kwlists
eval_nj=32


#Shadow data files
shadow_data_dir=(
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval
) # shadow_data_dir
shadow_data_list=(
./conf/lists/404-georgian//dev.list
./conf/lists/404-georgian//eval.list
) # shadow_data_dir
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
shadow_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # shadow_kwlists
shadow_nj=32


#Unsupervised dataset for FullLP condition
unsup_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training
unsup_data_list=./conf/lists/404-georgian//untranscribed-training.list
unsup_nj=32


lexicon_file=
lexiconFlags="--romanized --oov <unk>"



54 changes: 54 additions & 0 deletions egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# include common settings for fullLP systems.
. conf/common.limitedLP || exit 1;


#speech corpora files location
train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
train_data_list=./conf/lists/404-georgian//sub-train.list
train_nj=32


#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev2h_data_list=./conf/lists/404-georgian//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev2h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true


#Official DEV corpora files location
dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev10h_data_list=./conf/lists/404-georgian//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev10h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32


#Unsupervised dataset for LimitedLP condition
unsup_data_list=(
./conf/lists/404-georgian//untranscribed-training.list
./conf/lists/404-georgian//sub-train.untranscribed.list
) # unsup_data_list
unsup_data_dir=(
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
) # unsup_data_dir
unsup_nj=32


lexicon_file=
lexiconFlags="--romanized --oov <unk>"



Loading

0 comments on commit 9a6feea

Please sign in to comment.