Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes to babel pipeline #1509

Merged
merged 2 commits into from
Mar 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions egs/babel/s5d/conf/common.fullLP
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ babel_type=full

use_pitch=true

lmwt_plp_extra_opts=( --min-lmwt 8 --max-lmwt 18 )
lmwt_plp_extra_opts=( --min-lmwt 9 --max-lmwt 13 )
lmwt_bnf_extra_opts=( --min-lmwt 15 --max-lmwt 22 )
lmwt_dnn_extra_opts=( --min-lmwt 10 --max-lmwt 15 )
lmwt_chain_extra_opts=( --min-lmwt 4 --max-lmwt 22 )
lmwt_chain_extra_opts=( --min-lmwt 9 --max-lmwt 13 )

dnn_beam=16.0
dnn_lat_beam=8.5
Expand Down
1 change: 1 addition & 0 deletions egs/babel/s5d/conf/common_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cer=0

#Declaring here to make the definition inside the language conf files more
# transparent and nice
declare -A train_kwlists
declare -A dev10h_kwlists
declare -A dev2h_kwlists
declare -A evalpart1_kwlists
Expand Down
4 changes: 2 additions & 2 deletions egs/babel/s5d/conf/lang/104-pashto-fullLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#speech corpora files location
train_data_dir=/export/babel/data/104-pashto/release-current/conversational/training
train_data_list=/export/babel/data/splits/Pashto_Babel104/train.FullLP.list
train_data_list=./conf/lists/104-pashto/training.list
train_nj=32

#RADICAL DEV2H data files
Expand All @@ -22,7 +22,7 @@ dev2h_nj=18

#Official DEV data files
dev10h_data_dir=/export/babel/data/104-pashto/release-current/conversational/dev
dev10h_data_list=/export/babel/data/splits/Pashto_Babel104/dev.list
dev10h_data_list=./conf/lists/104-pashto/dev.list
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.stm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev.ecf.xml
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel104b-v0.4bY_conv-dev/IARPA-babel104b-v0.4bY_conv-dev.mitllfa3.rttm
Expand Down
9 changes: 8 additions & 1 deletion egs/babel/s5d/conf/lang/105-turkish-fullLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
#speech corpora files location
train_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/training
train_data_list=/export/babel/data/splits/Turkish_Babel105/train.fullLP.list
train_nj=32
#train_nj=32
train_ecf_file=./data/train/ecf.train.xml
train_rttm_file=./exp/tri5/rttm
train_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel105b-v0.4_conv-dev.kwlist2.xml
)
train_nj=64

#RADICAL DEV data files
dev2h_data_dir=/export/babel/data/105-turkish/release-current-b/conversational/dev
Expand Down
18 changes: 10 additions & 8 deletions egs/babel/s5d/conf/lang/305-guarani.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ train_nj=32
#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev
dev2h_data_list=./conf/lists/305-guarani//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -24,11 +25,12 @@ dev2h_subset_ecf=true
#Official DEV corpora files location
dev10h_data_dir=/export/babel/data/305-guarani/IARPA-babel305b-v1.0b-build/BABEL_OP3_305/conversational/dev
dev10h_data_list=./conf/lists/305-guarani//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.stm
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0a_conv-dev/IARPA-babel305b-v1.0a_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev/IARPA-babel305b-v1.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel305b-v1.0c_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/306-igbo.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev/IARPA-babel306b-v2.0c_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel306b-v2.0c_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
10 changes: 8 additions & 2 deletions egs/babel/s5d/conf/lang/307-amharic.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev/IARPA-babel307b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel307b-v1.0b_conv-dev.kwlist4.xml
) # dev10h_kwlists
dev10h_nj=32

Expand All @@ -42,5 +44,9 @@ unsup_nj=32
lexicon_file=/export/babel/data/307-amharic/IARPA-babel307b-v1.0b-build/BABEL_OP3_307/conversational/reference_materials/lexicon.txt
lexiconFlags="--romanized --oov <unk>"

extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1


6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/401-mongolian.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev/IARPA-babel401b-v2.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel401b-v2.0b_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
8 changes: 4 additions & 4 deletions egs/babel/s5d/conf/lang/402-javanese.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,9 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[kwlist3]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist3.xml

[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev/IARPA-babel402b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel402b-v1.0b_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
6 changes: 4 additions & 2 deletions egs/babel/s5d/conf/lang/403-dholuo.FLP.official.conf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-de
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm
dev2h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true
Expand All @@ -28,7 +29,8 @@ dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-d
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.stm
dev10h_kwlists=(
[kwlist]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev/IARPA-babel403b-v1.0b_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel403b-v1.0b_conv-dev.kwlist4.xml
) # dev10h_kwlists
dev10h_nj=32

Expand Down
78 changes: 78 additions & 0 deletions egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# include common settings for fullLP systems.
. conf/common.fullLP || exit 1;


#speech corpora files location
train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
train_data_list=./conf/lists/404-georgian//training.list
train_nj=32


#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev2h_data_list=./conf/lists/404-georgian//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev2h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true


#Official DEV corpora files location
dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev10h_data_list=./conf/lists/404-georgian//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev10h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32


#Official EVAL period evaluation data files
eval_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval
eval_data_list=./conf/lists/404-georgian//eval.list
eval_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-eval.ecf.xml
eval_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # eval_kwlists
eval_nj=32


#Shadow data files
shadow_data_dir=(
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/eval
) # shadow_data_dir
shadow_data_list=(
./conf/lists/404-georgian//dev.list
./conf/lists/404-georgian//eval.list
) # shadow_data_dir
shadow_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
shadow_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
shadow_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
shadow_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev.kwlist3.xml
) # shadow_kwlists
shadow_nj=32


#Unsupervised dataset for FullLP condition
unsup_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training
unsup_data_list=./conf/lists/404-georgian//untranscribed-training.list
unsup_nj=32


lexicon_file=
lexiconFlags="--romanized --oov <unk>"



54 changes: 54 additions & 0 deletions egs/babel/s5d/conf/lang/404-georgian.LLP.official.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# include common settings for fullLP systems.
. conf/common.limitedLP || exit 1;


#speech corpora files location
train_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
train_data_list=./conf/lists/404-georgian//sub-train.list
train_nj=32


#Radical reduced DEV corpora files location
dev2h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev2h_data_list=./conf/lists/404-georgian//dev.2h.list
dev2h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev2h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev2h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev2h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml
) # dev2h_kwlists
dev2h_nj=16
dev2h_subset_ecf=true


#Official DEV corpora files location
dev10h_data_dir=/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/dev
dev10h_data_list=./conf/lists/404-georgian//dev.list
dev10h_rttm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.mitllfa3.rttm
dev10h_ecf_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.scoring.ecf.xml
dev10h_stm_file=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.stm
dev10h_kwlists=(
[dev]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist.xml
[eval]=/export/babel/data/scoring/IndusDB/IARPA-babel404b-v1.0a_conv-dev/IARPA-babel404b-v1.0a_conv-dev.annot.kwlist3.xml
) # dev10h_kwlists
dev10h_nj=32


#Unsupervised dataset for LimitedLP condition
unsup_data_list=(
./conf/lists/404-georgian//untranscribed-training.list
./conf/lists/404-georgian//sub-train.untranscribed.list
) # unsup_data_list
unsup_data_dir=(
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/untranscribed-training
/export/babel/data//404-georgian/IARPA-babel404b-v1.0a-build/BABEL_OP3_404/conversational/training
) # unsup_data_dir
unsup_nj=32


lexicon_file=
lexiconFlags="--romanized --oov <unk>"



Loading