conf/conf.proto

/*
Copyright (c) Facebook, Inc. and its affiliates.

This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
*/

// Format this file with clang after editing:
//   clang-format-8 conf/*.proto -i
syntax = "proto2";
package fairdiplomacy;

import public "conf/agents.proto";
import public "conf/common.proto";

// Launcher message defines how to launch the job. Two options are avilable -
// locally or on slurm. Launcher information is expected to be a part of the
// main config.
message Launcher {

  message Local { optional bool use_local = 1; }

  message Slurm {
    optional int32 num_gpus = 1 [ default = 0 ];
    // By default starting one task per GPU. But if this flag is set, then
    // will use one task per machine.
    optional bool single_task_per_node = 2 [ default = false ];

    optional string partition = 3 [ default = "learnfair" ];

    optional int32 hours = 4;
    // Memory per GPU in GB.
    optional int32 mem_per_gpu = 5 [ default = 62 ];
    optional string comment = 6;

    // Number of CPUs per GPU. You probably want 40 on Pascals and 10 otherwise.
    optional int32 cpus_per_gpu = 7 [ default = 10 ];

    // If set, will schedule job only on volta GPUs with 32GB of mem.
    optional bool volta32 = 8;
    // If set, will schedule the job only on Pascal GPUs.
    optional bool pascal = 9;
    // If set, will schedule job only on volta GPUs.
    optional bool volta = 10;
  }

  oneof launcher {
    Local local = 1;
    Slurm slurm = 2;
  }
}

// Root config to compare agents.
message CompareAgentsTask {
  // The order here is expected to match fairdiplomacy.models.consts.POWERS
  enum Power {
    AUSTRIA = 0;
    ENGLAND = 1;
    FRANCE = 2;
    GERMANY = 3;
    ITALY = 4;
    RUSSIA = 5;
    TURKEY = 6;
  }

  optional Agent agent_one = 2;
  // Ignored if use_shared_agent.
  optional Agent agent_six = 3;
  optional Agent cf_agent = 4;

  optional Power power_one = 5;

  optional string out = 6;
  optional int32 seed = 7 [ default = -1 ];

  // Optional. For tests - max number of turns to run.
  optional int32 max_turns = 8;
  optional int32 max_year = 9 [ default = 1935 ];

  // Optional. If set, agent_six is ignored, and agent_one is used to get all
  // strategies. Enable share_strategy on CFR to get speed up.
  optional bool use_shared_agent = 10;

  // Optional. If set, then the agents starts from the last phase in the game
  // unless start_phase is set.
  optional string start_game = 11;
  // Optional. Only applies to the case when start_game is set.
  optional string start_phase = 12;

  // Optional. If set, will draw after this number of years with SC ownership
  // trasnfer (detected by dipcc).
  optional int32 draw_on_stalemate_years = 13;

  // Capture agent logging to the game json file
  optional bool capture_logs = 14;

  optional int32 num_processes = 90
      [ default = 0 ];                            // for data collection only!
  optional int32 num_trials = 91 [ default = 0 ]; // for data collection only!

  // If specified, requeue on slurm signal
  optional bool use_default_requeue = 99 [ default = false ];
  optional Launcher launcher = 100;
}

message NoPressDatasetParams {
  // Train dataset file
  // Expected format: one json file per line, each line is
  // "<path><space><json>\n" i.e. the result of: for g in
  // /path/to/jsons/game_*.json ; do echo -n "$g " && cat $g ; done
  optional string train_set_path = 1;

  // Validation datset file, same format as above
  optional string val_set_path = 2;

  // Path to file containing game metadata.
  optional string metadata_path = 12;

  // Dataloader procs (1 means load in the main process).
  optional int32 num_dataloader_workers = 3 [ default = 80 ];

  // Minimal score (num SC) of the at the enf of the game needed to include the
  // power into the training set.
  optional int32 only_with_min_final_score = 4;

  // exclude actions with >=n units, all holds, from the dataset
  optional int32 exclude_n_holds = 5 [ default = -1 ];

  // DEPRECATED
  optional bool debug_only_opening_phase = 6 [ deprecated = true ];

  // Factor used in exponential weighted average of sum of squares scores
  optional float value_decay_alpha = 7 [ default = 1.0 ];

  // Optional path to dir containing json files with state values.
  optional string value_dir = 8;

  // Optional. If specified, use this agent's orders instead of the orders in
  // the game.json
  optional Agent cf_agent = 9;

  // Optional, only valid with cf_agent. If > 1, sample cf_agent multiple
  // times, saving each as a separate row in the db.
  optional uint32 n_cf_agent_samples = 10 [ default = 1 ];

  // For debugging: use only the first n games of dataset, if > 0
  optional int32 limit_n_games = 11 [ default = -1 ];

  // cut this percentile of games based on player rating
  // (only for dataset with player ratings)
  optional float min_rating_percentile = 13 [ default = 0 ];

  // DEPRECATED
  optional string data_dir = 500 [ deprecated = true ];
}

message Encoder {
  message Transformer {
    // Number of attention heads. Must evenly divide inter_emb_size * 2.
    optional int32 num_heads = 1;

    // Number of intermediate channels for the feed-forward component
    optional int32 ff_channels = 2;

    // Number of transformer layers in the encoder
    optional int32 num_blocks = 3;

    // Channelwise dropout probablity.
    optional float dropout = 4;

    // Layerwise dropout probability.
    optional float layerdrop = 5 [ default = 0.0 ];
  }

  // Graph conv encoder, for backwards compatibility, is represented as
  // this oneof being None, and its parameters are directly inline
  // in TrainTask instead of being part of this oneof
  oneof encoder { Transformer transformer = 1; }
}

message TrainTask {
  // No Press dataset params
  optional NoPressDatasetParams dataset_params = 45;

  // Batch size per GPU.
  optional int32 batch_size = 4;

  // Learning rate.
  optional float lr = 5;

  // Learning rate decay per epoch.
  optional float lr_decay = 6;

  // Max gradient norm.
  optional float clip_grad_norm = 7;

  // Path to load/save the model.
  optional string checkpoint = 8;

  // Prob[teacher forcing] during training.
  optional float teacher_force = 10;

  // LSTM dropout pct.
  optional float lstm_dropout = 11;

  // Encoder dropout pct. IGNORED when using Transformer encoder.
  optional float encoder_dropout = 12;

  // If set, use a single process.
  optional bool debug_no_mp = 14;

  // Skip validation / save.
  optional bool skip_validation = 15;

  // Learn adjacency matrix.
  // IGNORED when using Transformer encoder.
  optional bool learnable_A = 16;

  // Obsolete.
  optional bool fill_missing_orders = 17 [ default = false ];

  // Learn attention alignment matrix.
  optional bool learnable_alignments = 18;

  // Average across location embedding instead of using attention.
  optional bool avg_embedding = 19;

  // Use extremely simple one-hot attention in decoder
  optional bool use_simple_alignments = 60;

  // Number of GCN layers in encoder. IGNORED when using Transformer encoder.
  optional int32 num_encoder_blocks = 20;

  // Number of channels for intermediate encoder layers
  optional int32 inter_emb_size = 55 [ default = 120 ];

  // Use global average pooling layers in encoder blocks. Legacy parameter used
  // only for graph encoder. IGNORED when using Transformer encoder.
  optional bool use_global_pooling = 53;

  // Architecture for the encoder (graph vs transformer)
  optional Encoder encoder = 54;

  // Max number of epochs to train
  optional int32 num_epochs = 21;

  // Stale. Always enabled.
  optional bool write_jsonl = 22;

  // Weight of value loss relative to policy loss, between 0 and 1
  optional float value_loss_weight = 23;

  // Scale factor for initial value decoder weights
  optional float value_decoder_init_scale = 24;

  // Max gradient norm in value decoder params
  optional float value_decoder_clip_grad_norm = 25;

  // Value head dropout pct.
  optional float value_dropout = 27;

  // obsolete
  optional bool graph_decoder = 32 [ default = false ];

  // dimension of LSTM
  optional int32 lstm_size = 33 [ default = 200 ];

  // number of LSTM layers
  optional int32 lstm_layers = 34 [ default = 1 ];

  // if true, add features to output orders in the model
  optional bool featurize_output = 35 [ default = false ];

  // if true, add "relational" features to output orders in the model
  optional bool relfeat_output = 36 [ default = false ];

  optional bool shuffle_locs = 38 [ default = false ];

  optional bool featurize_prev_orders = 39 [ default = false ];

  // Legacy parameter used only for graph encoder.
  // IGNORED when using Transformer encoder.
  optional bool residual_linear = 40 [ default = false ];

  // Use a bunch of extra encoder blocks merging prev orders and board state.
  // Legacy parameter used only for graph encoder.
  // IGNORED when using Transformer encoder.
  optional bool merged_gnn = 41 [ default = false ];

  // Optional. If set to a positive value, will skip each residual layer in
  // encoder with this probability. IGNORED when using Transformer encoder.
  optional float encoder_layerdrop = 42 [ default = 0.0 ];

  // Optional. If true, will use softmax on top of value head. Otherwise, will
  // take squares and normalize.
  optional bool value_softmax = 50 [ default = false ];

  // Optional. If set, will stop the epoch after that many batches. For testing
  // puproses.
  optional int32 epoch_max_batches = 51;

  optional bool separate_value_encoder = 52;

  optional bool auto_mixed_precision = 57 [ default = false ];

  // Pad spatial size to the nearest multiple of this. (e.g. 8 would result in
  // 81 -> 88)
  optional int32 pad_spatial_size_to_multiple = 58 [ default = 1 ];

  // Random seed
  optional int32 seed = 59 [ default = 0 ];

  // If set, train model to predict all units for all powers in one sequence
  optional bool all_powers = 61;

  // Linear learning rate warmup for this many epochs at the start.
  optional int32 warmup_epochs = 62;

  // Setting either of these to false will entirely omit the parts of the model
  // that are no longer necessary, and these parts will not be present in the
  // checkpoint. Attempting to forward() for the omitted output for a model that
  // doesn't have it will raise an error.
  optional bool has_policy = 64 [ default = true ];
  optional bool has_value = 65 [ default = true ];

  // DEPRECATED or moved to NoPressDatasetParams
  optional string data_cache = 500 [ deprecated = true ];
  optional string metadata_path = 501 [ deprecated = true ];
  optional float min_rating_percentile = 502 [ deprecated = true ];
  optional float val_set_pct = 9 [ deprecated = true ];
  optional int32 max_games = 30 [ default = -1, deprecated = true ];

  // If specified, requeue on slurm signal
  optional bool use_default_requeue = 900 [ default = false ];
  optional Launcher launcher = 1000;
}

message LaunchBotTask {
  // Agent cfg to play against
  optional Agent agent = 1;

  // Diplomacy server host
  optional string host = 2;

  // Diplomacy server port
  optional int32 port = 3;

  // Run every period (in seconds)
  optional uint32 period = 4;

  // Number of powers to manage on server
  optional uint32 buffer_size = 5;

  // If non-zero, # of model servers to launch and reuse
  optional uint32 reuse_model_servers = 6;

  // If specified, connect only to this game
  optional string game_id = 7;

  // If specified, connect only as this power
  optional string power = 8;

  // Reuse a single Agent object for all orders
  optional bool reuse_agent = 9 [ default = true ];

  // Optional. The length of time (in minutes) during which only messages will
  // be sent (no orders)
  optional float message_only_time = 10 [ default = 0 ];

  // If specified, requeue on slurm signal
  optional bool use_default_requeue = 900 [ default = false ];
  optional Launcher launcher = 1000;
}

// A dummy task to use in tests.
message TestTask {
  message SubMessage { optional int32 subscalar = 1 [ default = -1 ]; }

  message ComplexSubmessageWithIncludes {
    repeated Include includes = 1;

    optional SubMessage sub = 2;
  }

  enum SomeEnum {
    ZERO = 0;
    ONE = 1;
  };

  optional float scalar = 1 [ default = -1 ];
  optional float scalar_no_default = 2;
  optional SubMessage sub = 3;
  optional SubMessage sub2 = 4;

  optional SomeEnum enum_value = 5 [ default = ZERO ];
  optional SomeEnum enum_value_no_default = 6;

  oneof oneof_field {
    int32 oneof_value1 = 7;
    int32 oneof_value2 = 8;
    SubMessage oneof_value3 = 9;
  }

  optional ComplexSubmessageWithIncludes complex_sub = 10;

  map<string, int32> some_map = 11;

  optional bool bool_field_no_default = 12;
  optional bool bool_field_with_default_false = 13 [ default = false ];
  optional bool bool_field_with_default_true = 14 [ default = true ];

  optional Launcher launcher = 100;
}

message ExploitTask {
  // For PG mode only model_path is used. This model is used for both policy and
  // value.
  //
  // For AlphaDip mode both flags are used. If only model_path is given all
  // losses will be applied to this model. If both are provided, value losses
  // are applied to value_model_path and policy losses to model_path.
  //
  // By default rollout workers will use models specified in the
  // search_rollout.agent.cfr1p.*model_path.
  // To send trained models to rollouters, use
  // search_rollout.extra_params.use_trained_{policy,value}.
  //
  // Eval workers will use a trained value model iff value loss is on.
  // Eval workers will use a trained policy model iff policy loss is on.
  optional string model_path = 1;
  optional string value_model_path = 22;

  // Optional. If set, will load from this full ckpt (requeue.ckpt file in exp
  // dir).
  optional string requeue_ckpt_path = 25;

  // Weight of critic loss in total loss.
  // For AlphaDip setting this to zero disabled value loss
  optional float critic_weight = 2 [ default = 1.0 ];

  // Weight of surrogate entropy loss that should push action-level entropy up.
  optional float sampled_entropy_weight = 10;

  // Optional. If set, weights of the exploit agent will be randomly
  // initialized.
  optional bool reset_agent_weights = 8;

  optional Trainer trainer = 6;

  // Optional. If positive, will set random seed for torch on the main process.
  optional int32 seed = 9 [ default = -1 ];

  // ############### SEARCH ONLY FLAGS
  // Search mode. Weight of the XE between the netwrork's policy and the search
  // policy.
  optional float search_policy_weight = 13;
  // Apply policy loss every this many batches. Set to < 1.0 to speed up
  // training.
  optional float search_policy_update_prob = 16 [ default = 1.0 ];
  optional bool search_policy_mse = 17;
  // If set, will multiply target probabilities of search policy by probability
  // mass of blueprint over these actions.
  optional bool search_policy_mse_bp_normalized = 19;
  optional float search_policy_mse_bp_upper_bound = 20 [ default = 1.0 ];
  // Optional. Disable boostrap for RL+search when exploring.

  // Must be always true. Using online targets is not supported anymore.
  optional bool bootstrap_offline_targets = 15;

  // Num gpus to use. Search only.
  optional int32 num_train_gpus = 18 [ default = 1 ];

  message SearchEvTarget {
    optional float temperature = 1;
    optional bool use_softmax = 2 [ default = true ];
  }

  oneof maybe_search_ev_loss { SearchEvTarget search_ev_loss = 21; }

  // Use a faster method of multiGPU training with processes instead of threads
  optional bool use_distributed_data_parallel = 26 [ default = false ];

  // ############### PG ONLY FLAGS
  // Reward discounting.
  optional float discounting = 7 [ default = 1.0 ];
  // Weight of entropy loss in total loss.
  optional float entropy_weight = 3 [ default = 0.0 ];

  message Optimization {
    // Deprecated. Use LR within optimizer.
    optional float lr = 1;
    // Optional (but highly recommended). Gradient clipping.
    optional float grad_clip = 2;

    // Optional. Warmup LR from zero to normal linearly.
    optional int32 warmup_epochs = 3;

    // Optional. Set lr to 0 by this number of epochs. Cannot be less than
    // number of epochs.
    optional int32 cosine_decay_epochs = 6;

    // Optional. Multiply LR by factor every certain number of epochs
    optional int32 step_decay_epochs = 7;
    optional float step_decay_factor = 8;

    message AdamOptimizer { optional float lr = 1; }

    message SgdOptimizer {
      optional float lr = 1;
      optional float momentum = 2;
      optional float weight_decay = 3;
    }

    oneof optimizer {
      AdamOptimizer adam = 4;
      SgdOptimizer sgd = 5;
    };
  }
  optional Optimization optimizer = 4;

  // Only for AlphaDip.
  oneof maybe_value_optimizer { Optimization value_optimizer = 200; }

  message Rollout {
    // Required. Max number of steps to do in the rollout.
    optional int32 rollout_max_length = 1;

    // Optional. How many parallel games to batch within single rollout.
    optional int32 rollout_batch_size = 2 [ default = 1 ];

    // Optional. How many rollout proccesses to run. If zero or negative, will
    // run rollouts in the main process.
    optional int32 num_rollout_processes = 4 [ default = 1 ];

    // Optional. If > 0, will save games with this stride.
    optional int32 dump_games_every = 5 [ default = 0 ];

    // Optional. Max batch size in postman inference processes.
    optional int32 inference_batch_size = 6;

    // Optional. Wait at least this number of seconds before loading new model
    // in the inference worker. By default check before every forward.
    optional int32 inference_ckpt_sync_every = 14;

    // Required. The size of the produces batches. That what the training loop
    // will receive.
    optional int32 batch_size = 7;

    // Optional. How much adjancent batches overleave. Note that default value
    // (1) means that each action frame will be used exactly once as last item
    // in a batch is remove in impala.
    optional int32 batch_interleave_size = 8 [ default = 1 ];

    // Optional. If set, the batches will concatenate rollouts until batch_size
    // is reached, instead of following it exactly.
    optional bool do_not_split_rollouts = 9;

    optional bool single_rollout_gpu = 11;
    optional int32 server_procs_per_gpu = 12 [ default = 1 ];

    message Reward {
      // Required. Name of the score metric from
      // fairdiplomacy.utils.game_scoring.
      optional string score_name = 1;

      // Optional. Penalty for each move to encourage shorter games.
      optional float delay_penalty = 2;

      // Optional. If set, then the reward will be a difference between the
      // score before the action and after the action.
      optional bool differential_reward = 3;

      // Optional. Hacky way to hardcore alliances.
      // 0 -> no alliances
      // 1 -> FRA, ENG, GER vs all.
      // 2 -> FRA, ENG, GER, IT vs all.
      // 3 -> FRA, ENG, RUS vs all.
      // 4 -> FRA vs all.
      optional int32 alliance_type = 4;
    }

    // Required. How to compute rewards.
    optional Reward reward = 10;

    // Optional. Whether do self plat instead of exploitability.
    optional bool selfplay = 13;

    // Required in selfplay. Number of rollout proccess to do eval rollouts
    // against the supervised model. These rollouts are ingored for training.
    // These workers are subtracted from num_rollout_processes.
    optional int32 num_eval_rollout_processes = 15;

    // Required. Temperature for the oponent agent.
    optional float blueprint_temperature = 16;

    // Optional. If set, will override global model_path.
    optional string blueprint_model_path = 20;

    // Optional. If set, will stop rollout once the explout agent/agents is out.
    optional bool fast_finish = 17;

    // Optional. If provided, rollouts and evals with start from these games.
    // The file is expected to contain pathes to game.jsons one per line.
    optional string initial_games_index_file = 18;

    // Optional. If set, will use only this number of cores. Last cores are
    // reserved assuming machine has 80 cores.
    optional int32 num_cores_to_reserve = 19;
  }

  optional Rollout rollout = 5;

  message SearchRollout {
    // Required. Params of the CFR agent. To simplify includes allowing
    // arbitrary agent here, but code will only accept CFR1P agent.
    optional Agent agent = 1;

    // // Required. Max number of steps to do in the rollout.
    // optional int32 rollout_max_length = 2;

    // Optional. How many rollout proccesses to run per GPU. If zero or
    // negative, will run single rollout process.
    optional int32 num_workers_per_gpu = 4 [ default = 1 ];

    // Required. How many consequetive phases are batched together. This affects
    // batch (num phases in batch = chunk_length * batch_size) and how
    // frequent workers send updates to the trainer (once chunk_length phases
    // are collected).
    optional int32 chunk_length = 6;

    // Required. The size of the produces batches. This is multipliplied by
    // chunk_length, see above.
    optional int32 batch_size = 7;

    // Optional. If set, the batches will concatenate rollouts until batch_size
    // is reached, instead of following it exactly.
    optional bool do_not_split_rollouts = 9;

    optional int32 server_procs_per_gpu = 12 [ default = 1 ];

    // Required in selfplay. Number of rollout proccess to do eval rollouts
    // against the supervised model. These rollouts are ingored for training.
    // These workers are subtracted from num_rollout_processes.
    // optional int32 num_eval_rollout_processes = 15;

    // Optional. If provided, rollouts and evals with start from these games.
    // The file is expected to contain pathes to game.jsons one per line.
    optional string initial_games_index_file = 18;

    // Optional. If set, will use only this number of cores. Last cores are
    // reserved assuming machine has 80 cores.
    optional int32 num_cores_to_reserve = 19;

    // Optional. How verbose logging for rollout processes.
    // 0 - no logging
    // 1 - process 0 writes INFO and everyone else writes WARNINGS
    // 2 - everyone writes INFO
    optional int32 verbosity = 20 [ default = 1 ];

    message ExtraRolloutParams {
      // Optional. Whether share the same CFR strategy for everyone.
      optional bool single_cfr = 1 [ default = false ];

      // Optional. If positive will sample from all plausible actions instead of
      // doing CFR with this probability.
      optional float explore_eps = 2;

      // Optional. Custom explore constants for first 2 phases in the game.
      optional float explore_s1901m_eps = 14;
      optional float explore_f1901m_eps = 15;

      // Optional. Put each rollout this many times to a queue. For debugging.
      optional int32 fake_gen = 3 [ default = 1 ];

      // Optional. If set, will decided whether to explore for each agent
      // independently.
      optional bool independent_explore = 4;

      // Optional. If set, will choose explore step first and then an agent that
      // doesn't deviate at this step, and deviate for the others.
      optional bool explore_all_but_one = 9;

      // Optional. If set, will not play after this year.
      optional int32 max_year = 5;

      // Optional. If set, will set max_year on rollour workers randomly from
      // [1902, max_year].
      optional bool randomize_max_year = 6;

      // Optional. If set will reduce num plausible orders in the agent until
      // this epoch.
      optional int32 grow_action_space_till = 7;

      // Optional. Simplex-discounting factor.
      optional float discounting = 8 [ default = 1.0 ];

      // Optional. If set, will use CFR-based EVs of the next state as targets.
      optional bool use_ev_targets = 19;

      // Optional. If set, will collect CFR policies and send to the trainer.
      // This flag should be set programatically by other flags.
      optional bool collect_search_policies = 10;

      // Optional. If set, will collect CFR policies and send to the trainer.
      // This flag should be set programatically by other flags.
      optional bool collect_search_evs = 18;

      // Optional. If set, will use the model under training as a policy, i.e.,
      // to generate plausible actions.
      optional bool use_trained_policy = 11;

      // Optional. If set, will use the model under training for value
      // estination.
      optional bool use_trained_value = 21 [ default = true ];

      // UNUSED
      optional bool fork_explore = 12;

      // Optional. If set, use completely random policy (uniform sampling) for
      // plausible orders.
      optional bool random_policy = 13;

      // Optional. Run DoubleOracle in this percentage of games. Only value if
      // `do` is provided.
      optional float run_do_prob = 17 [ default = 1.0 ];

      oneof maybe_do { DoubleOracleExploration do = 16; }
    }

    // Will be passed to `yield_rollouts`.
    optional ExtraRolloutParams extra_params = 21;

    // If set, no training is done, only data generation is benchmarked.
    optional bool benchmark_only = 22;

    message Buffer {
      // Required. Buffer size in chunks.
      optional int32 capacity = 1;

      optional int32 prefetch = 4;

      // Optional. If set, will pre-load buffer from this path on save.
      optional string load_path = 2;
      // Optinal. If set, will save buffer after this many examples added.
      // Ignored if load_path is used.
      optional int32 save_at = 3;

      // Optional. If set, will shuffle elements within chunks.
      optional bool shuffle = 5;
    }

    // Optional. If set, will throttle training if train_sampled/gen_sammple
    // above this value.
    optional float enforce_train_gen_ratio = 25 [ default = -1 ];

    optional Buffer buffer = 23;

    message TestSituationEval { optional bool do_eval = 1; }

    optional TestSituationEval test_situation_eval = 24;

    message H2HEval {
      optional Agent agent_six = 1;
      optional string tag = 2;
      // Optional. If provided, redefines global initial_games_index_file.
      optional string initial_games_index_file = 3;

      // These flags will be set automatically.
      // use_trained_{policy,value} is set to true iff we apply loss on
      // {policy,value}. The trainer nows which losses are used and will set
      // these variables correspndingly.
      optional bool use_trained_policy = 4;
      optional bool use_trained_value = 5;
    }

    // Optional. Run h2h eval against 6 other agents.
    optional H2HEval h2h_eval_0 = 26;
    optional H2HEval h2h_eval_1 = 29;

    // Optional. If positive, automatically draw if no center ownership changes
    // in this number of years.
    optional int32 draw_on_stalemate_years = 27 [ default = -1 ];

    optional int32 warmup_batches = 28 [ default = 1 ];
  }

  // Rollout params for ReSearch mode.
  optional SearchRollout search_rollout = 11;

  message Trainer {
    // Optional. By default = infinite.
    optional int32 max_epochs = 1;
    // Required. Number of updates per epoch.
    optional int32 epoch_size = 2;
    // Optional. Save checkpoint every so many epochs.
    optional int32 save_checkpoint_every = 3;
    // Optional. Communicate current ckpt to inference workers every so many
    // iterations.
    optional int32 save_sync_checkpoint_every = 4 [ default = 1 ];
    // Optional. Debugging option. Stop updating model after this number of
    // updates.
    optional int32 max_updates = 5 [ default = 0 ];
    // Optional. If set, will train a model being in eval mode.
    optional bool train_as_eval = 6;
    optional bool train_encoder_as_eval = 7;
    optional bool train_decoder_as_eval = 8;
    // Run everything in eval mode except for batch norm modules. Essentially it
    // puts only dropout to eval mode.
    optional bool train_as_eval_but_batchnorm = 9;
  }

  // Arbitraty comment. Could use to make a "re" run for the same config and
  // changed code.
  optional string comment = 999;
  optional Launcher launcher = 1000;
}

message BuildDbCacheTask {
  // Dataset Params
  optional NoPressDatasetParams dataset_params = 1;

  // Required. Glob pattern to game.json files
  optional string glob = 2;

  // Required. Path to save db cache
  optional string out_path = 3;

  // Optional. If specified, use this agent's orders instead of the orders in
  // the game.json
  optional Agent cf_agent = 4;

  // Optional, only valid with cf_agent. If > 1, sample cf_agent multiple
  // times, saving each as a separate row in the db.
  optional uint32 n_cf_agent_samples = 5 [ default = 1 ];

  // Percentage of games to use as val set.
  optional float val_set_pct = 6 [ default = 0.01 ];

  optional Launcher launcher = 100;
}

// The root config. Every top-level prototxt must be a message of this type.
// User's code will recieve a specific task config after all includes and
// redefines are resolved.
message MetaCfg {
  repeated Include includes = 1;
  oneof task {
    CompareAgentsTask compare_agents = 101;
    TrainTask train = 102;
    LaunchBotTask launch_bot = 103;
    ExploitTask exploit = 104;
    BuildDbCacheTask build_db_cache = 105;

    // Dummy task to test heyhi.
    TestTask test = 999;
  }
}