-
Notifications
You must be signed in to change notification settings - Fork 1
/
benchmk_50eaf423.hpar.yaml
93 lines (83 loc) · 2.81 KB
/
benchmk_50eaf423.hpar.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
Defaults:
# this config works for flux-data
# note, CFS-based location will run slower vs. SCRATCH
cfs_out: &CFS_OUT /global/homes/b/balewski/prje/tmp_NyxHydro_outFlux4
cfs_data: &CFS_DATA /global/homes/b/balewski/prje/superRes-Nyx2022a-flux
summit_data: &AST153_PROJ /gpfs/alpine/ast153/proj-shared/balewski/superRes-packed_h5XX/
summit_out: &AST153_WORLD /gpfs/alpine/world-shared/ast153/balewski/tmp_NyxHydro4kDXXX/
checkpoint_interval/epochs: 5
comment: production config for Super-Resolution using Flux
const_local_batch: true
# optional, reduces training samples
#max_glob_samples_per_epoch: 256 # do 1400 or 256=BS32
myId: 50eaf423
num_cpu_workers: 8
num_inp_chan: 1
opt_pytorch: {autotune: true, detect_anomaly: false, zerograd: true}
pred_dump_interval/epochs: 100
text_log_interval/epochs: 1
image_flip_rot: false
facility_conf:
perlmutter:
D_LR: {decay/epochs: auto, init: 2.39e-05, reduce: 0.0562, warmup/epochs: 20}
G_LR: {decay/epochs: auto, init: 9.99e-05, reduce: 0.0288, warmup/epochs: 20}
batch_size: 4
base_path: *CFS_OUT
#data_path: /pscratch/sd/b/balewski/superRes-packed_h5XXX/
data_path: *CFS_DATA
summit:
base_path: *AST153_WORLD
data_path: *AST153_PROJ
batch_size: 4
D_LR: missing
crusher:
base_path: *AST153_WORLD
data_path: *AST153_PROJ
D_LR: {decay/epochs: auto, init: 2.39e-05, reduce: 0.0562, warmup/epochs: 20}
G_LR: {decay/epochs: auto, init: 9.99e-05, reduce: 0.0288, warmup/epochs: 20}
batch_size: 4
data_conf:
LR_zFin: flux_LR_z3
HR_zIni: invBarDens_HR_z200
HR_zFin: flux_HR_z3
model_conf:
D: #........... Discriminator
conv_block:
bn: [1, 0, 0, 0, 1, 0, 1]
filter: [32, 64, 128, 128, 256, 512, 512]
kernel: [2, 2, 2, 2, 2, 2, 2]
stride: [2, 2, 2, 2, 2, 1, 1]
fc_block:
dims: [64, 64, 32]
dropFrac: 0.119
summary_verbosity: 0 # 0=off, 1=layers, 2=torchsummary, 3=both
G: # ....... Generator
first_cnn_chan: 32
num_resid_conv: 13
num_upsamp_bits: 2
conv_block3: # CNN params, last chan must be 1
filter: [60, 30, 10, 1]
kernel: [ 5, 5, 5, 5]
summary_verbosity: 0 # 0=off, 1=layers, 2=torchsummary, 3=both
comment: hpoA perlmutter 20220212_111627_PST
tb_model_graph: null
qa_hpo_1gpu: {D_param_count: 13648577, G_param_count: 329778, adv_samples_per_sec: 41.54471043913954,
facility: perlmutter}
train_conf:
PG_LR: {init: 0.0001}
pre_epochs: 30 # was 20
adv_epochs: 10
perc_warmup/epochs: 50
early_stop_discr: {G_thres: 0.03, ring_size/epochs:100}
advers_weight: 0.2
content_weight: 0.8
fft_weight: 0.3
msum_weight: 0.06
pixel_weight: 8.0
loss_norm_h5: cube_82337823.normFlux.h5
resume: false
resume_d_weight: ''
resume_g_weight: ''
resume_p_weight: ''
start_epoch: 0
start_pre_epoch: 0