This repository has been archived by the owner on Oct 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathconf.proto
857 lines (644 loc) · 28.8 KB
/
conf.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
/*
Copyright (c) Facebook, Inc. and its affiliates.
This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
*/
// Format this file with clang after editing:
// clang-format-8 conf/*.proto -i
syntax = "proto2";
package fairdiplomacy;
import public "conf/agents.proto";
import public "conf/common.proto";
// Launcher message defines how to launch the job. Two options are avilable -
// locally or on slurm. Launcher information is expected to be a part of the
// main config.
message Launcher {
message Local { optional bool use_local = 1; }
message Slurm {
optional int32 num_gpus = 1 [ default = 0 ];
// By default starting one task per GPU. But if this flag is set, then
// will use one task per machine.
optional bool single_task_per_node = 2 [ default = false ];
optional string partition = 3 [ default = "learnfair" ];
optional int32 hours = 4;
// Memory per GPU in GB.
optional int32 mem_per_gpu = 5 [ default = 62 ];
optional string comment = 6;
// Number of CPUs per GPU. You probably want 40 on Pascals and 10 otherwise.
optional int32 cpus_per_gpu = 7 [ default = 10 ];
// If set, will schedule job only on volta GPUs with 32GB of mem.
optional bool volta32 = 8;
// If set, will schedule the job only on Pascal GPUs.
optional bool pascal = 9;
// If set, will schedule job only on volta GPUs.
optional bool volta = 10;
}
oneof launcher {
Local local = 1;
Slurm slurm = 2;
}
}
// Root config to compare agents.
message CompareAgentsTask {
// The order here is expected to match fairdiplomacy.models.consts.POWERS
enum Power {
AUSTRIA = 0;
ENGLAND = 1;
FRANCE = 2;
GERMANY = 3;
ITALY = 4;
RUSSIA = 5;
TURKEY = 6;
}
optional Agent agent_one = 2;
// Ignored if use_shared_agent.
optional Agent agent_six = 3;
optional Agent cf_agent = 4;
optional Power power_one = 5;
optional string out = 6;
optional int32 seed = 7 [ default = -1 ];
// Optional. For tests - max number of turns to run.
optional int32 max_turns = 8;
optional int32 max_year = 9 [ default = 1935 ];
// Optional. If set, agent_six is ignored, and agent_one is used to get all
// strategies. Enable share_strategy on CFR to get speed up.
optional bool use_shared_agent = 10;
// Optional. If set, then the agents starts from the last phase in the game
// unless start_phase is set.
optional string start_game = 11;
// Optional. Only applies to the case when start_game is set.
optional string start_phase = 12;
// Optional. If set, will draw after this number of years with SC ownership
// trasnfer (detected by dipcc).
optional int32 draw_on_stalemate_years = 13;
// Capture agent logging to the game json file
optional bool capture_logs = 14;
optional int32 num_processes = 90
[ default = 0 ]; // for data collection only!
optional int32 num_trials = 91 [ default = 0 ]; // for data collection only!
// If specified, requeue on slurm signal
optional bool use_default_requeue = 99 [ default = false ];
optional Launcher launcher = 100;
}
message NoPressDatasetParams {
// Train dataset file
// Expected format: one json file per line, each line is
// "<path><space><json>\n" i.e. the result of: for g in
// /path/to/jsons/game_*.json ; do echo -n "$g " && cat $g ; done
optional string train_set_path = 1;
// Validation datset file, same format as above
optional string val_set_path = 2;
// Path to file containing game metadata.
optional string metadata_path = 12;
// Dataloader procs (1 means load in the main process).
optional int32 num_dataloader_workers = 3 [ default = 80 ];
// Minimal score (num SC) of the at the enf of the game needed to include the
// power into the training set.
optional int32 only_with_min_final_score = 4;
// exclude actions with >=n units, all holds, from the dataset
optional int32 exclude_n_holds = 5 [ default = -1 ];
// DEPRECATED
optional bool debug_only_opening_phase = 6 [ deprecated = true ];
// Factor used in exponential weighted average of sum of squares scores
optional float value_decay_alpha = 7 [ default = 1.0 ];
// Optional path to dir containing json files with state values.
optional string value_dir = 8;
// Optional. If specified, use this agent's orders instead of the orders in
// the game.json
optional Agent cf_agent = 9;
// Optional, only valid with cf_agent. If > 1, sample cf_agent multiple
// times, saving each as a separate row in the db.
optional uint32 n_cf_agent_samples = 10 [ default = 1 ];
// For debugging: use only the first n games of dataset, if > 0
optional int32 limit_n_games = 11 [ default = -1 ];
// cut this percentile of games based on player rating
// (only for dataset with player ratings)
optional float min_rating_percentile = 13 [ default = 0 ];
// DEPRECATED
optional string data_dir = 500 [ deprecated = true ];
}
message Encoder {
message Transformer {
// Number of attention heads. Must evenly divide inter_emb_size * 2.
optional int32 num_heads = 1;
// Number of intermediate channels for the feed-forward component
optional int32 ff_channels = 2;
// Number of transformer layers in the encoder
optional int32 num_blocks = 3;
// Channelwise dropout probablity.
optional float dropout = 4;
// Layerwise dropout probability.
optional float layerdrop = 5 [ default = 0.0 ];
}
// Graph conv encoder, for backwards compatibility, is represented as
// this oneof being None, and its parameters are directly inline
// in TrainTask instead of being part of this oneof
oneof encoder { Transformer transformer = 1; }
}
message TrainTask {
// No Press dataset params
optional NoPressDatasetParams dataset_params = 45;
// Batch size per GPU.
optional int32 batch_size = 4;
// Learning rate.
optional float lr = 5;
// Learning rate decay per epoch.
optional float lr_decay = 6;
// Max gradient norm.
optional float clip_grad_norm = 7;
// Path to load/save the model.
optional string checkpoint = 8;
// Prob[teacher forcing] during training.
optional float teacher_force = 10;
// LSTM dropout pct.
optional float lstm_dropout = 11;
// Encoder dropout pct. IGNORED when using Transformer encoder.
optional float encoder_dropout = 12;
// If set, use a single process.
optional bool debug_no_mp = 14;
// Skip validation / save.
optional bool skip_validation = 15;
// Learn adjacency matrix.
// IGNORED when using Transformer encoder.
optional bool learnable_A = 16;
// Obsolete.
optional bool fill_missing_orders = 17 [ default = false ];
// Learn attention alignment matrix.
optional bool learnable_alignments = 18;
// Average across location embedding instead of using attention.
optional bool avg_embedding = 19;
// Use extremely simple one-hot attention in decoder
optional bool use_simple_alignments = 60;
// Number of GCN layers in encoder. IGNORED when using Transformer encoder.
optional int32 num_encoder_blocks = 20;
// Number of channels for intermediate encoder layers
optional int32 inter_emb_size = 55 [ default = 120 ];
// Use global average pooling layers in encoder blocks. Legacy parameter used
// only for graph encoder. IGNORED when using Transformer encoder.
optional bool use_global_pooling = 53;
// Architecture for the encoder (graph vs transformer)
optional Encoder encoder = 54;
// Max number of epochs to train
optional int32 num_epochs = 21;
// Stale. Always enabled.
optional bool write_jsonl = 22;
// Weight of value loss relative to policy loss, between 0 and 1
optional float value_loss_weight = 23;
// Scale factor for initial value decoder weights
optional float value_decoder_init_scale = 24;
// Max gradient norm in value decoder params
optional float value_decoder_clip_grad_norm = 25;
// Value head dropout pct.
optional float value_dropout = 27;
// obsolete
optional bool graph_decoder = 32 [ default = false ];
// dimension of LSTM
optional int32 lstm_size = 33 [ default = 200 ];
// number of LSTM layers
optional int32 lstm_layers = 34 [ default = 1 ];
// if true, add features to output orders in the model
optional bool featurize_output = 35 [ default = false ];
// if true, add "relational" features to output orders in the model
optional bool relfeat_output = 36 [ default = false ];
optional bool shuffle_locs = 38 [ default = false ];
optional bool featurize_prev_orders = 39 [ default = false ];
// Legacy parameter used only for graph encoder.
// IGNORED when using Transformer encoder.
optional bool residual_linear = 40 [ default = false ];
// Use a bunch of extra encoder blocks merging prev orders and board state.
// Legacy parameter used only for graph encoder.
// IGNORED when using Transformer encoder.
optional bool merged_gnn = 41 [ default = false ];
// Optional. If set to a positive value, will skip each residual layer in
// encoder with this probability. IGNORED when using Transformer encoder.
optional float encoder_layerdrop = 42 [ default = 0.0 ];
// Optional. If true, will use softmax on top of value head. Otherwise, will
// take squares and normalize.
optional bool value_softmax = 50 [ default = false ];
// Optional. If set, will stop the epoch after that many batches. For testing
// puproses.
optional int32 epoch_max_batches = 51;
optional bool separate_value_encoder = 52;
optional bool auto_mixed_precision = 57 [ default = false ];
// Pad spatial size to the nearest multiple of this. (e.g. 8 would result in
// 81 -> 88)
optional int32 pad_spatial_size_to_multiple = 58 [ default = 1 ];
// Random seed
optional int32 seed = 59 [ default = 0 ];
// If set, train model to predict all units for all powers in one sequence
optional bool all_powers = 61;
// Linear learning rate warmup for this many epochs at the start.
optional int32 warmup_epochs = 62;
// Setting either of these to false will entirely omit the parts of the model
// that are no longer necessary, and these parts will not be present in the
// checkpoint. Attempting to forward() for the omitted output for a model that
// doesn't have it will raise an error.
optional bool has_policy = 64 [ default = true ];
optional bool has_value = 65 [ default = true ];
// DEPRECATED or moved to NoPressDatasetParams
optional string data_cache = 500 [ deprecated = true ];
optional string metadata_path = 501 [ deprecated = true ];
optional float min_rating_percentile = 502 [ deprecated = true ];
optional float val_set_pct = 9 [ deprecated = true ];
optional int32 max_games = 30 [ default = -1, deprecated = true ];
// If specified, requeue on slurm signal
optional bool use_default_requeue = 900 [ default = false ];
optional Launcher launcher = 1000;
}
message LaunchBotTask {
// Agent cfg to play against
optional Agent agent = 1;
// Diplomacy server host
optional string host = 2;
// Diplomacy server port
optional int32 port = 3;
// Run every period (in seconds)
optional uint32 period = 4;
// Number of powers to manage on server
optional uint32 buffer_size = 5;
// If non-zero, # of model servers to launch and reuse
optional uint32 reuse_model_servers = 6;
// If specified, connect only to this game
optional string game_id = 7;
// If specified, connect only as this power
optional string power = 8;
// Reuse a single Agent object for all orders
optional bool reuse_agent = 9 [ default = true ];
// Optional. The length of time (in minutes) during which only messages will
// be sent (no orders)
optional float message_only_time = 10 [ default = 0 ];
// If specified, requeue on slurm signal
optional bool use_default_requeue = 900 [ default = false ];
optional Launcher launcher = 1000;
}
// A dummy task to use in tests.
message TestTask {
message SubMessage { optional int32 subscalar = 1 [ default = -1 ]; }
message ComplexSubmessageWithIncludes {
repeated Include includes = 1;
optional SubMessage sub = 2;
}
enum SomeEnum {
ZERO = 0;
ONE = 1;
};
optional float scalar = 1 [ default = -1 ];
optional float scalar_no_default = 2;
optional SubMessage sub = 3;
optional SubMessage sub2 = 4;
optional SomeEnum enum_value = 5 [ default = ZERO ];
optional SomeEnum enum_value_no_default = 6;
oneof oneof_field {
int32 oneof_value1 = 7;
int32 oneof_value2 = 8;
SubMessage oneof_value3 = 9;
}
optional ComplexSubmessageWithIncludes complex_sub = 10;
map<string, int32> some_map = 11;
optional bool bool_field_no_default = 12;
optional bool bool_field_with_default_false = 13 [ default = false ];
optional bool bool_field_with_default_true = 14 [ default = true ];
optional Launcher launcher = 100;
}
message ExploitTask {
// For PG mode only model_path is used. This model is used for both policy and
// value.
//
// For AlphaDip mode both flags are used. If only model_path is given all
// losses will be applied to this model. If both are provided, value losses
// are applied to value_model_path and policy losses to model_path.
//
// By default rollout workers will use models specified in the
// search_rollout.agent.cfr1p.*model_path.
// To send trained models to rollouters, use
// search_rollout.extra_params.use_trained_{policy,value}.
//
// Eval workers will use a trained value model iff value loss is on.
// Eval workers will use a trained policy model iff policy loss is on.
optional string model_path = 1;
optional string value_model_path = 22;
// Optional. If set, will load from this full ckpt (requeue.ckpt file in exp
// dir).
optional string requeue_ckpt_path = 25;
// Weight of critic loss in total loss.
// For AlphaDip setting this to zero disabled value loss
optional float critic_weight = 2 [ default = 1.0 ];
// Weight of surrogate entropy loss that should push action-level entropy up.
optional float sampled_entropy_weight = 10;
// Optional. If set, weights of the exploit agent will be randomly
// initialized.
optional bool reset_agent_weights = 8;
optional Trainer trainer = 6;
// Optional. If positive, will set random seed for torch on the main process.
optional int32 seed = 9 [ default = -1 ];
// ############### SEARCH ONLY FLAGS
// Search mode. Weight of the XE between the netwrork's policy and the search
// policy.
optional float search_policy_weight = 13;
// Apply policy loss every this many batches. Set to < 1.0 to speed up
// training.
optional float search_policy_update_prob = 16 [ default = 1.0 ];
optional bool search_policy_mse = 17;
// If set, will multiply target probabilities of search policy by probability
// mass of blueprint over these actions.
optional bool search_policy_mse_bp_normalized = 19;
optional float search_policy_mse_bp_upper_bound = 20 [ default = 1.0 ];
// Optional. Disable boostrap for RL+search when exploring.
// Must be always true. Using online targets is not supported anymore.
optional bool bootstrap_offline_targets = 15;
// Num gpus to use. Search only.
optional int32 num_train_gpus = 18 [ default = 1 ];
message SearchEvTarget {
optional float temperature = 1;
optional bool use_softmax = 2 [ default = true ];
}
oneof maybe_search_ev_loss { SearchEvTarget search_ev_loss = 21; }
// Use a faster method of multiGPU training with processes instead of threads
optional bool use_distributed_data_parallel = 26 [ default = false ];
// ############### PG ONLY FLAGS
// Reward discounting.
optional float discounting = 7 [ default = 1.0 ];
// Weight of entropy loss in total loss.
optional float entropy_weight = 3 [ default = 0.0 ];
message Optimization {
// Deprecated. Use LR within optimizer.
optional float lr = 1;
// Optional (but highly recommended). Gradient clipping.
optional float grad_clip = 2;
// Optional. Warmup LR from zero to normal linearly.
optional int32 warmup_epochs = 3;
// Optional. Set lr to 0 by this number of epochs. Cannot be less than
// number of epochs.
optional int32 cosine_decay_epochs = 6;
// Optional. Multiply LR by factor every certain number of epochs
optional int32 step_decay_epochs = 7;
optional float step_decay_factor = 8;
message AdamOptimizer { optional float lr = 1; }
message SgdOptimizer {
optional float lr = 1;
optional float momentum = 2;
optional float weight_decay = 3;
}
oneof optimizer {
AdamOptimizer adam = 4;
SgdOptimizer sgd = 5;
};
}
optional Optimization optimizer = 4;
// Only for AlphaDip.
oneof maybe_value_optimizer { Optimization value_optimizer = 200; }
message Rollout {
// Required. Max number of steps to do in the rollout.
optional int32 rollout_max_length = 1;
// Optional. How many parallel games to batch within single rollout.
optional int32 rollout_batch_size = 2 [ default = 1 ];
// Optional. How many rollout proccesses to run. If zero or negative, will
// run rollouts in the main process.
optional int32 num_rollout_processes = 4 [ default = 1 ];
// Optional. If > 0, will save games with this stride.
optional int32 dump_games_every = 5 [ default = 0 ];
// Optional. Max batch size in postman inference processes.
optional int32 inference_batch_size = 6;
// Optional. Wait at least this number of seconds before loading new model
// in the inference worker. By default check before every forward.
optional int32 inference_ckpt_sync_every = 14;
// Required. The size of the produces batches. That what the training loop
// will receive.
optional int32 batch_size = 7;
// Optional. How much adjancent batches overleave. Note that default value
// (1) means that each action frame will be used exactly once as last item
// in a batch is remove in impala.
optional int32 batch_interleave_size = 8 [ default = 1 ];
// Optional. If set, the batches will concatenate rollouts until batch_size
// is reached, instead of following it exactly.
optional bool do_not_split_rollouts = 9;
optional bool single_rollout_gpu = 11;
optional int32 server_procs_per_gpu = 12 [ default = 1 ];
message Reward {
// Required. Name of the score metric from
// fairdiplomacy.utils.game_scoring.
optional string score_name = 1;
// Optional. Penalty for each move to encourage shorter games.
optional float delay_penalty = 2;
// Optional. If set, then the reward will be a difference between the
// score before the action and after the action.
optional bool differential_reward = 3;
// Optional. Hacky way to hardcore alliances.
// 0 -> no alliances
// 1 -> FRA, ENG, GER vs all.
// 2 -> FRA, ENG, GER, IT vs all.
// 3 -> FRA, ENG, RUS vs all.
// 4 -> FRA vs all.
optional int32 alliance_type = 4;
}
// Required. How to compute rewards.
optional Reward reward = 10;
// Optional. Whether do self plat instead of exploitability.
optional bool selfplay = 13;
// Required in selfplay. Number of rollout proccess to do eval rollouts
// against the supervised model. These rollouts are ingored for training.
// These workers are subtracted from num_rollout_processes.
optional int32 num_eval_rollout_processes = 15;
// Required. Temperature for the oponent agent.
optional float blueprint_temperature = 16;
// Optional. If set, will override global model_path.
optional string blueprint_model_path = 20;
// Optional. If set, will stop rollout once the explout agent/agents is out.
optional bool fast_finish = 17;
// Optional. If provided, rollouts and evals with start from these games.
// The file is expected to contain pathes to game.jsons one per line.
optional string initial_games_index_file = 18;
// Optional. If set, will use only this number of cores. Last cores are
// reserved assuming machine has 80 cores.
optional int32 num_cores_to_reserve = 19;
}
optional Rollout rollout = 5;
message SearchRollout {
// Required. Params of the CFR agent. To simplify includes allowing
// arbitrary agent here, but code will only accept CFR1P agent.
optional Agent agent = 1;
// // Required. Max number of steps to do in the rollout.
// optional int32 rollout_max_length = 2;
// Optional. How many rollout proccesses to run per GPU. If zero or
// negative, will run single rollout process.
optional int32 num_workers_per_gpu = 4 [ default = 1 ];
// Required. How many consequetive phases are batched together. This affects
// batch (num phases in batch = chunk_length * batch_size) and how
// frequent workers send updates to the trainer (once chunk_length phases
// are collected).
optional int32 chunk_length = 6;
// Required. The size of the produces batches. This is multipliplied by
// chunk_length, see above.
optional int32 batch_size = 7;
// Optional. If set, the batches will concatenate rollouts until batch_size
// is reached, instead of following it exactly.
optional bool do_not_split_rollouts = 9;
optional int32 server_procs_per_gpu = 12 [ default = 1 ];
// Required in selfplay. Number of rollout proccess to do eval rollouts
// against the supervised model. These rollouts are ingored for training.
// These workers are subtracted from num_rollout_processes.
// optional int32 num_eval_rollout_processes = 15;
// Optional. If provided, rollouts and evals with start from these games.
// The file is expected to contain pathes to game.jsons one per line.
optional string initial_games_index_file = 18;
// Optional. If set, will use only this number of cores. Last cores are
// reserved assuming machine has 80 cores.
optional int32 num_cores_to_reserve = 19;
// Optional. How verbose logging for rollout processes.
// 0 - no logging
// 1 - process 0 writes INFO and everyone else writes WARNINGS
// 2 - everyone writes INFO
optional int32 verbosity = 20 [ default = 1 ];
message ExtraRolloutParams {
// Optional. Whether share the same CFR strategy for everyone.
optional bool single_cfr = 1 [ default = false ];
// Optional. If positive will sample from all plausible actions instead of
// doing CFR with this probability.
optional float explore_eps = 2;
// Optional. Custom explore constants for first 2 phases in the game.
optional float explore_s1901m_eps = 14;
optional float explore_f1901m_eps = 15;
// Optional. Put each rollout this many times to a queue. For debugging.
optional int32 fake_gen = 3 [ default = 1 ];
// Optional. If set, will decided whether to explore for each agent
// independently.
optional bool independent_explore = 4;
// Optional. If set, will choose explore step first and then an agent that
// doesn't deviate at this step, and deviate for the others.
optional bool explore_all_but_one = 9;
// Optional. If set, will not play after this year.
optional int32 max_year = 5;
// Optional. If set, will set max_year on rollour workers randomly from
// [1902, max_year].
optional bool randomize_max_year = 6;
// Optional. If set will reduce num plausible orders in the agent until
// this epoch.
optional int32 grow_action_space_till = 7;
// Optional. Simplex-discounting factor.
optional float discounting = 8 [ default = 1.0 ];
// Optional. If set, will use CFR-based EVs of the next state as targets.
optional bool use_ev_targets = 19;
// Optional. If set, will collect CFR policies and send to the trainer.
// This flag should be set programatically by other flags.
optional bool collect_search_policies = 10;
// Optional. If set, will collect CFR policies and send to the trainer.
// This flag should be set programatically by other flags.
optional bool collect_search_evs = 18;
// Optional. If set, will use the model under training as a policy, i.e.,
// to generate plausible actions.
optional bool use_trained_policy = 11;
// Optional. If set, will use the model under training for value
// estination.
optional bool use_trained_value = 21 [ default = true ];
// UNUSED
optional bool fork_explore = 12;
// Optional. If set, use completely random policy (uniform sampling) for
// plausible orders.
optional bool random_policy = 13;
// Optional. Run DoubleOracle in this percentage of games. Only value if
// `do` is provided.
optional float run_do_prob = 17 [ default = 1.0 ];
oneof maybe_do { DoubleOracleExploration do = 16; }
}
// Will be passed to `yield_rollouts`.
optional ExtraRolloutParams extra_params = 21;
// If set, no training is done, only data generation is benchmarked.
optional bool benchmark_only = 22;
message Buffer {
// Required. Buffer size in chunks.
optional int32 capacity = 1;
optional int32 prefetch = 4;
// Optional. If set, will pre-load buffer from this path on save.
optional string load_path = 2;
// Optinal. If set, will save buffer after this many examples added.
// Ignored if load_path is used.
optional int32 save_at = 3;
// Optional. If set, will shuffle elements within chunks.
optional bool shuffle = 5;
}
// Optional. If set, will throttle training if train_sampled/gen_sammple
// above this value.
optional float enforce_train_gen_ratio = 25 [ default = -1 ];
optional Buffer buffer = 23;
message TestSituationEval { optional bool do_eval = 1; }
optional TestSituationEval test_situation_eval = 24;
message H2HEval {
optional Agent agent_six = 1;
optional string tag = 2;
// Optional. If provided, redefines global initial_games_index_file.
optional string initial_games_index_file = 3;
// These flags will be set automatically.
// use_trained_{policy,value} is set to true iff we apply loss on
// {policy,value}. The trainer nows which losses are used and will set
// these variables correspndingly.
optional bool use_trained_policy = 4;
optional bool use_trained_value = 5;
}
// Optional. Run h2h eval against 6 other agents.
optional H2HEval h2h_eval_0 = 26;
optional H2HEval h2h_eval_1 = 29;
// Optional. If positive, automatically draw if no center ownership changes
// in this number of years.
optional int32 draw_on_stalemate_years = 27 [ default = -1 ];
optional int32 warmup_batches = 28 [ default = 1 ];
}
// Rollout params for ReSearch mode.
optional SearchRollout search_rollout = 11;
message Trainer {
// Optional. By default = infinite.
optional int32 max_epochs = 1;
// Required. Number of updates per epoch.
optional int32 epoch_size = 2;
// Optional. Save checkpoint every so many epochs.
optional int32 save_checkpoint_every = 3;
// Optional. Communicate current ckpt to inference workers every so many
// iterations.
optional int32 save_sync_checkpoint_every = 4 [ default = 1 ];
// Optional. Debugging option. Stop updating model after this number of
// updates.
optional int32 max_updates = 5 [ default = 0 ];
// Optional. If set, will train a model being in eval mode.
optional bool train_as_eval = 6;
optional bool train_encoder_as_eval = 7;
optional bool train_decoder_as_eval = 8;
// Run everything in eval mode except for batch norm modules. Essentially it
// puts only dropout to eval mode.
optional bool train_as_eval_but_batchnorm = 9;
}
// Arbitraty comment. Could use to make a "re" run for the same config and
// changed code.
optional string comment = 999;
optional Launcher launcher = 1000;
}
message BuildDbCacheTask {
// Dataset Params
optional NoPressDatasetParams dataset_params = 1;
// Required. Glob pattern to game.json files
optional string glob = 2;
// Required. Path to save db cache
optional string out_path = 3;
// Optional. If specified, use this agent's orders instead of the orders in
// the game.json
optional Agent cf_agent = 4;
// Optional, only valid with cf_agent. If > 1, sample cf_agent multiple
// times, saving each as a separate row in the db.
optional uint32 n_cf_agent_samples = 5 [ default = 1 ];
// Percentage of games to use as val set.
optional float val_set_pct = 6 [ default = 0.01 ];
optional Launcher launcher = 100;
}
// The root config. Every top-level prototxt must be a message of this type.
// User's code will recieve a specific task config after all includes and
// redefines are resolved.
message MetaCfg {
repeated Include includes = 1;
oneof task {
CompareAgentsTask compare_agents = 101;
TrainTask train = 102;
LaunchBotTask launch_bot = 103;
ExploitTask exploit = 104;
BuildDbCacheTask build_db_cache = 105;
// Dummy task to test heyhi.
TestTask test = 999;
}
}