Skip to content

Commit

Permalink
Merge branch 'master' of github.com:kaldi-asr/kaldi
Browse files Browse the repository at this point in the history
  • Loading branch information
vimalmanohar committed Dec 27, 2017
2 parents d1ba027 + a5561c3 commit b4b42cc
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 1,757 deletions.
3 changes: 3 additions & 0 deletions egs/wsj/s5/steps/info/chain_dir_info.pl
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ sub get_combine_info {
if (m/Combining nnets, objective function changed from (\S+) to (\S+)/) {
close(F);
return sprintf(" combine=%.3f->%.3f", $1, $2);
} elsif (m/Combining (\S+) nnets, objective function changed from (\S+) to (\S+)/) {
close(F);
return sprintf(" combine=%.3f->%.3f (over %d)", $2, $3, $1);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions egs/wsj/s5/steps/info/nnet3_dir_info.pl
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ sub get_combine_info {
if (m/Combining nnets, objective function changed from (\S+) to (\S+)/) {
close(F);
return sprintf(" combine=%.2f->%.2f", $1, $2);
} elsif (m/Combining (\S+) nnets, objective function changed from (\S+) to (\S+)/) {
close(F);
return sprintf(" combine=%.2f->%.2f (over %d)", $2, $3, $1);
}
}
}
Expand Down
17 changes: 4 additions & 13 deletions egs/wsj/s5/steps/libs/nnet3/train/chain_objf/acoustic_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def compute_progress(dir, iter, run_opts):
def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_str,
egs_dir, leaky_hmm_coefficient, l2_regularize,
xent_regularize, run_opts,
sum_to_one_penalty=0.0):
max_objective_evaluations=30):
""" Function to do model combination
In the nnet3 setup, the logic
Expand All @@ -505,9 +505,6 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st

models_to_combine.add(num_iters)

# TODO: if it turns out the sum-to-one-penalty code is not useful,
# remove support for it.

for iter in sorted(models_to_combine):
model_file = '{0}/{1}.mdl'.format(dir, iter)
if os.path.exists(model_file):
Expand All @@ -528,12 +525,9 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st

common_lib.execute_command(
"""{command} {combine_queue_opt} {dir}/log/combine.log \
nnet3-chain-combine --num-iters={opt_iters} \
nnet3-chain-combine \
--max-objective-evaluations={max_objective_evaluations} \
--l2-regularize={l2} --leaky-hmm-coefficient={leaky} \
--separate-weights-per-component={separate_weights} \
--enforce-sum-to-one={hard_enforce} \
--sum-to-one-penalty={penalty} \
--enforce-positive-weights=true \
--verbose=3 {dir}/den.fst {raw_models} \
"ark,bg:nnet3-chain-copy-egs ark:{egs_dir}/combine.cegs ark:- | \
nnet3-chain-merge-egs --minibatch-size={num_chunk_per_mb} \
Expand All @@ -542,12 +536,9 @@ def combine_models(dir, num_iters, models_to_combine, num_chunk_per_minibatch_st
{dir}/final.mdl""".format(
command=run_opts.command,
combine_queue_opt=run_opts.combine_queue_opt,
opt_iters=(20 if sum_to_one_penalty <= 0 else 80),
separate_weights=(sum_to_one_penalty > 0),
max_objective_evaluations=max_objective_evaluations,
l2=l2_regularize, leaky=leaky_hmm_coefficient,
dir=dir, raw_models=" ".join(raw_model_strings),
hard_enforce=(sum_to_one_penalty <= 0),
penalty=sum_to_one_penalty,
num_chunk_per_mb=num_chunk_per_minibatch_str,
num_iters=num_iters,
egs_dir=egs_dir))
Expand Down
14 changes: 11 additions & 3 deletions egs/wsj/s5/steps/libs/nnet3/train/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,6 +852,16 @@ def __init__(self,
the final model combination stage. These
models will themselves be averages of
iteration-number ranges""")
self.parser.add_argument("--trainer.optimization.max-objective-evaluations",
"--trainer.max-objective-evaluations",
type=int, dest='max_objective_evaluations',
default=30,
help="""The maximum number of objective
evaluations in order to figure out the
best number of models to combine. It helps to
speedup if the number of models provided to the
model combination binary is quite large (e.g.
several hundred).""")
self.parser.add_argument("--trainer.optimization.do-final-combination",
dest='do_final_combination', type=str,
action=common_lib.StrToBoolAction,
Expand All @@ -861,9 +871,7 @@ def __init__(self,
last-numbered model as the final.mdl).""")
self.parser.add_argument("--trainer.optimization.combine-sum-to-one-penalty",
type=float, dest='combine_sum_to_one_penalty', default=0.0,
help="""If > 0, activates 'soft' enforcement of the
sum-to-one penalty in combination (may be helpful
if using dropout). E.g. 1.0e-03.""")
help="""This option is deprecated and does nothing.""")
self.parser.add_argument("--trainer.optimization.momentum", type=float,
dest='momentum', default=0.0,
help="""Momentum used in update computation.
Expand Down
11 changes: 4 additions & 7 deletions egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir,
minibatch_size_str,
run_opts,
chunk_width=None, get_raw_nnet_from_am=True,
sum_to_one_penalty=0.0,
max_objective_evaluations=30,
use_multitask_egs=False,
compute_per_dim_accuracy=False):
""" Function to do model combination
Expand Down Expand Up @@ -501,10 +501,8 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir,
use_multitask_egs=use_multitask_egs)
common_lib.execute_command(
"""{command} {combine_queue_opt} {dir}/log/combine.log \
nnet3-combine --num-iters=80 \
--enforce-sum-to-one={hard_enforce} \
--sum-to-one-penalty={penalty} \
--enforce-positive-weights=true \
nnet3-combine \
--max-objective-evaluations={max_objective_evaluations} \
--verbose=3 {raw_models} \
"ark,bg:nnet3-copy-egs {multitask_egs_opts} \
{egs_rspecifier} ark:- | \
Expand All @@ -513,9 +511,8 @@ def combine_models(dir, num_iters, models_to_combine, egs_dir,
""".format(command=run_opts.command,
combine_queue_opt=run_opts.combine_queue_opt,
dir=dir, raw_models=" ".join(raw_model_strings),
max_objective_evaluations=max_objective_evaluations,
egs_rspecifier=egs_rspecifier,
hard_enforce=(sum_to_one_penalty <= 0),
penalty=sum_to_one_penalty,
mbsize=minibatch_size_str,
out_model=out_model,
multitask_egs_opts=multitask_egs_opts))
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/chain/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def train(args, run_opts):
l2_regularize=args.l2_regularize,
xent_regularize=args.xent_regularize,
run_opts=run_opts,
sum_to_one_penalty=args.combine_sum_to_one_penalty)
max_objective_evaluations=args.max_objective_evaluations)
else:
logger.info("Copying the last-numbered model to final.mdl")
common_lib.force_symlink("{0}.mdl".format(num_iters),
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/train_dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def train(args, run_opts):
models_to_combine=models_to_combine,
egs_dir=egs_dir,
minibatch_size_str=args.minibatch_size, run_opts=run_opts,
sum_to_one_penalty=args.combine_sum_to_one_penalty)
max_objective_evaluations=args.max_objective_evaluations)

if args.stage <= num_iters + 1:
logger.info("Getting average posterior for purposes of "
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/train_raw_dnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def train(args, run_opts):
models_to_combine=models_to_combine, egs_dir=egs_dir,
minibatch_size_str=args.minibatch_size, run_opts=run_opts,
get_raw_nnet_from_am=False,
sum_to_one_penalty=args.combine_sum_to_one_penalty,
max_objective_evaluations=args.max_objective_evaluations,
use_multitask_egs=use_multitask_egs)
else:
common_lib.force_symlink("{0}.raw".format(num_iters),
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/train_raw_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ def train(args, run_opts):
run_opts=run_opts, chunk_width=args.chunk_width,
get_raw_nnet_from_am=False,
compute_per_dim_accuracy=args.compute_per_dim_accuracy,
sum_to_one_penalty=args.combine_sum_to_one_penalty)
max_objective_evaluations=args.max_objective_evaluations)
else:
common_lib.force_symlink("{0}.raw".format(num_iters),
"{0}/final.raw".format(args.dir))
Expand Down
2 changes: 1 addition & 1 deletion egs/wsj/s5/steps/nnet3/train_rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def train(args, run_opts):
run_opts=run_opts,
minibatch_size_str=args.num_chunk_per_minibatch,
chunk_width=args.chunk_width,
sum_to_one_penalty=args.combine_sum_to_one_penalty,
max_objective_evaluations=args.max_objective_evaluations,
compute_per_dim_accuracy=args.compute_per_dim_accuracy)

if args.stage <= num_iters + 1:
Expand Down
132 changes: 109 additions & 23 deletions src/chainbin/nnet3-chain-combine.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// chainbin/nnet3-chain-combine.cc

// Copyright 2012-2015 Johns Hopkins University (author: Daniel Povey)
// 2017 Yiming Wang

// See ../../COPYING for clarification regarding multiple authors
//
Expand All @@ -19,7 +20,65 @@

#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "nnet3/nnet-chain-combine.h"
#include "nnet3/nnet-utils.h"
#include "nnet3/nnet-compute.h"
#include "nnet3/nnet-chain-diagnostics.h"


namespace kaldi {
namespace nnet3 {

// Computes and returns the objective function for the examples in 'egs' given
// the model in 'nnet'. If either of batchnorm/dropout test modes is true, we
// make a copy of 'nnet', set test modes on that and evaluate its objective.
// Note: the object that prob_computer->nnet_ refers to should be 'nnet'.
double ComputeObjf(bool batchnorm_test_mode, bool dropout_test_mode,
const std::vector<NnetChainExample> &egs, const Nnet &nnet,
const chain::ChainTrainingOptions &chain_config,
const fst::StdVectorFst &den_fst,
NnetChainComputeProb *prob_computer) {
if (batchnorm_test_mode || dropout_test_mode) {
Nnet nnet_copy(nnet);
if (batchnorm_test_mode)
SetBatchnormTestMode(true, &nnet_copy);
if (dropout_test_mode)
SetDropoutTestMode(true, &nnet_copy);
NnetComputeProbOptions compute_prob_opts;
NnetChainComputeProb prob_computer_test(compute_prob_opts, chain_config,
den_fst, nnet_copy);
return ComputeObjf(false, false, egs, nnet_copy,
chain_config, den_fst, &prob_computer_test);
} else {
prob_computer->Reset();
std::vector<NnetChainExample>::const_iterator iter = egs.begin(),
end = egs.end();
for (; iter != end; ++iter)
prob_computer->Compute(*iter);
const ChainObjectiveInfo *objf_info =
prob_computer->GetObjective("output");
if (objf_info == NULL)
KALDI_ERR << "Error getting objective info (unsuitable egs?)";
KALDI_ASSERT(objf_info->tot_weight > 0.0);
// inf/nan tot_objf->return -inf objective.
double tot_objf = objf_info->tot_like + objf_info->tot_l2_term;
if (!(tot_objf == tot_objf && tot_objf - tot_objf == 0))
return -std::numeric_limits<double>::infinity();
// we prefer to deal with normalized objective functions.
return tot_objf / objf_info->tot_weight;
}
}

// Updates moving average over num_models nnets, given the average over
// previous (num_models - 1) nnets, and the new nnet.
void UpdateNnetMovingAverage(int32 num_models,
const Nnet &nnet, Nnet *moving_average_nnet) {
KALDI_ASSERT(NumParameters(nnet) == NumParameters(*moving_average_nnet));
ScaleNnet((num_models - 1.0) / num_models, moving_average_nnet);
AddNnet(nnet, 1.0 / num_models, moving_average_nnet);
}

}
}


int main(int argc, char *argv[]) {
Expand All @@ -30,9 +89,11 @@ int main(int argc, char *argv[]) {
typedef kaldi::int64 int64;

const char *usage =
"Using a subset of training or held-out nnet3+chain examples, compute an\n"
"optimal combination of anumber of nnet3 neural nets by maximizing the\n"
"'chain' objective function. See documentation of options for more details.\n"
"Using a subset of training or held-out nnet3+chain examples, compute\n"
"the average over the first n nnet models where we maximize the\n"
"'chain' objective function for n. Note that the order of models has\n"
"been reversed before feeding into this binary. So we are actually\n"
"combining last n models.\n"
"Inputs and outputs are nnet3 raw nnets.\n"
"\n"
"Usage: nnet3-chain-combine [options] <den-fst> <raw-nnet-in1> <raw-nnet-in2> ... <raw-nnet-inN> <chain-examples-in> <raw-nnet-out>\n"
Expand All @@ -41,23 +102,28 @@ int main(int argc, char *argv[]) {
" nnet3-combine den.fst 35.raw 36.raw 37.raw 38.raw ark:valid.cegs final.raw\n";

bool binary_write = true;
int32 max_objective_evaluations = 30;
bool batchnorm_test_mode = false,
dropout_test_mode = true;
std::string use_gpu = "yes";
NnetCombineConfig combine_config;
chain::ChainTrainingOptions chain_config;

ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Register("max-objective-evaluations", &max_objective_evaluations, "The "
"maximum number of objective evaluations in order to figure "
"out the best number of models to combine. It helps to speedup "
"if the number of models provided to this binary is quite "
"large (e.g. several hundred).");
po.Register("use-gpu", &use_gpu,
"yes|no|optional|wait, only has effect if compiled with CUDA");
po.Register("batchnorm-test-mode", &batchnorm_test_mode,
"If true, set test-mode to true on any BatchNormComponents.");
"If true, set test-mode to true on any BatchNormComponents "
"while evaluating objectives.");
po.Register("dropout-test-mode", &dropout_test_mode,
"If true, set test-mode to true on any DropoutComponents and "
"DropoutMaskComponents.");
"DropoutMaskComponents while evaluating objectives.");

combine_config.Register(&po);
chain_config.Register(&po);

po.Read(argc, argv);
Expand All @@ -83,11 +149,10 @@ int main(int argc, char *argv[]) {

Nnet nnet;
ReadKaldiObject(raw_nnet_rxfilename, &nnet);

if (batchnorm_test_mode)
SetBatchnormTestMode(true, &nnet);
if (dropout_test_mode)
SetDropoutTestMode(true, &nnet);
Nnet moving_average_nnet(nnet), best_nnet(nnet);
NnetComputeProbOptions compute_prob_opts;
NnetChainComputeProb prob_computer(compute_prob_opts, chain_config,
den_fst, moving_average_nnet);

std::vector<NnetChainExample> egs;
egs.reserve(10000); // reserve a lot of space to minimize the chance of
Expand All @@ -102,29 +167,50 @@ int main(int argc, char *argv[]) {
KALDI_ASSERT(!egs.empty());
}

// first evaluates the objective using the last model.
int32 best_num_to_combine = 1;
double
init_objf = ComputeObjf(batchnorm_test_mode, dropout_test_mode,
egs, moving_average_nnet, chain_config, den_fst, &prob_computer),
best_objf = init_objf;
KALDI_LOG << "objective function using the last model is " << init_objf;

int32 num_nnets = po.NumArgs() - 3;
NnetChainCombiner combiner(combine_config, chain_config,
num_nnets, egs, den_fst, nnet);

// then each time before we re-evaluate the objective function, we will add
// num_to_add models to the moving average.
int32 num_to_add = (num_nnets + max_objective_evaluations - 1) /
max_objective_evaluations;
for (int32 n = 1; n < num_nnets; n++) {
std::string this_nnet_rxfilename = po.GetArg(n + 2);
ReadKaldiObject(this_nnet_rxfilename, &nnet);
combiner.AcceptNnet(nnet);
// updates the moving average
UpdateNnetMovingAverage(n + 1, nnet, &moving_average_nnet);
// evaluates the objective everytime after adding num_to_add model or
// all the models to the moving average.
if ((n - 1) % num_to_add == num_to_add - 1 || n == num_nnets - 1) {
double objf = ComputeObjf(batchnorm_test_mode, dropout_test_mode,
egs, moving_average_nnet, chain_config, den_fst, &prob_computer);
KALDI_LOG << "Combining last " << n + 1
<< " models, objective function is " << objf;
if (objf > best_objf) {
best_objf = objf;
best_nnet = moving_average_nnet;
best_num_to_combine = n + 1;
}
}
}
KALDI_LOG << "Combining " << best_num_to_combine
<< " nnets, objective function changed from " << init_objf
<< " to " << best_objf;

combiner.Combine();

nnet = combiner.GetNnet();
if (HasBatchnorm(nnet))
RecomputeStats(egs, chain_config, den_fst, &nnet);
RecomputeStats(egs, chain_config, den_fst, &best_nnet);

#if HAVE_CUDA==1
CuDevice::Instantiate().PrintProfile();
#endif

WriteKaldiObject(nnet, nnet_wxfilename, binary_write);

WriteKaldiObject(best_nnet, nnet_wxfilename, binary_write);
KALDI_LOG << "Finished combining neural nets, wrote model to "
<< nnet_wxfilename;
} catch(const std::exception &e) {
Expand Down
4 changes: 2 additions & 2 deletions src/nnet3/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \
nnet-example.o nnet-nnet.o nnet-compile-utils.o \
nnet-utils.o nnet-compute.o nnet-test-utils.o nnet-analyze.o \
nnet-example-utils.o nnet-training.o \
nnet-diagnostics.o nnet-combine.o nnet-am-decodable-simple.o \
nnet-diagnostics.o nnet-am-decodable-simple.o \
nnet-optimize-utils.o nnet-chain-example.o \
nnet-chain-training.o nnet-chain-diagnostics.o nnet-chain-combine.o \
nnet-chain-training.o nnet-chain-diagnostics.o \
discriminative-supervision.o nnet-discriminative-example.o \
nnet-discriminative-diagnostics.o \
discriminative-training.o nnet-discriminative-training.o \
Expand Down
Loading

0 comments on commit b4b42cc

Please sign in to comment.