-
Notifications
You must be signed in to change notification settings - Fork 3
/
params.yaml
183 lines (172 loc) · 5.7 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# Define some default directories, which can then be used
# in the pipeline file `dvc.yaml`.
general:
data: data/cleaned.csv
samples: models/samples.hdf5
risks: models/risks.hdf5
prevalences: models/prevalences.hdf5
metrics: metrics.json
plots_dir: plots
########################################################################################
# This dictionary defines which LNLs are present in the
# graph and how they are connected.
graph:
tumor:
primary: [I, II, III, IV, V, VII]
lnl:
I: []
II: []
III: [IV]
IV: []
V: []
VII: []
########################################################################################
# Below are some parameters necessary for the model setup:
model:
first_binom_prob: 0.3 # defines shape of time prior of first T-stage
max_t: 10 # max number of time steps to evolve system
t_stages: [early, late] # T-stages to consider in the data
class: Unilateral # model class to use (see `lymph-model` docs)
# keyword arguments to pass to the model constructor
kwargs:
base_symmetric: false
trans_symmetric: true
use_mixing: true
########################################################################################
# Choose how to create the synthetic dataset. The currently set values
# reflect what one can see in the data (e.g. on https://lyprox.org)
synthetic:
t_stages_dist:
early: 0.6 # 60% of synethetic patients are early T-stage...
late: 0.4 # ...and 40% late T-stage
midline_ext_prob: 0.3 # create 30% of patients with midline extension
# Use these made-up specificity & sensitivity values
# to create the fake observations
modalities:
synth_CT: [0.85, 0.85]
########################################################################################
# Under this key we define the specficity and sensitivity for a range
# of diagnostic modalities. They are taken from literature and during
# sampling, everything that's not commented our here will be used for
# inference.
modalities:
CT: [0.76, 0.81]
MRI: [0.63, 0.81]
PET: [0.86, 0.79]
FNA: [0.98, 0.80]
diagnostic_consensus: [0.86, 0.81]
pathology: [1.0, 1.0]
pCT: [0.86, 0.81]
max_llh: [1.0, 1.0]
# these of the above modalities will be used to compute things
# like the naive maximum likelihood estimate of the true hidden
# state `max_llh`, or the `rank` "modality".
# The wird nested way of writing them down here is so that DVC can unpack the dict
# directly into the command.
data_cleaning_modalities:
modalities:
- CT
- MRI
- PET
- FNA
- diagnostic_consensus
- pathology
- pCT
# this lists the above defined modalities that will be used for
# inference and evaluation.
# The wird nested way of writing them down here is so that DVC can unpack the dict
# directly into the command.
inference_modalities:
modalities:
- max_llh
########################################################################################
# This defines the sampler settings and the thermodynamic integration path
sampling:
walkers_per_dim: 20 # num of parallel walkers per parameter space dimension
burnin: 1000 # burn-in steps to discard
nsteps: 20 # do this many serious steps per sampling round
thin_by: 10 # draw this many samples for one step in `nsteps`
temp_schedule: # use this as the inverse temp schedule for TI
- 0.0
- 1.0076210988534706e-09
- 3.224387516331106e-08
- 2.448519270213934e-07
- 1.031804005225954e-06
- 3.148815933917096e-06
- 7.835261664684588e-06
- 1.6935087808430282e-05
- 3.3017728167230525e-05
- 5.949901826619859e-05
- 0.00010076210988534707
- 0.0001622783855914503
- 0.0002507283732699068
- 0.0003741226606566017
- 0.000541922809869769
- 0.0007651622719418543
- 0.0010565673013513768
- 0.0014306778705547923
- 0.001903968584518355
- 0.00249496959524998
- 0.0032243875163311063
- 0.004115226337448558
- 0.00519290833892641
- 0.006485395006257844
- 0.008023307944637018
- 0.009840049793490924
- 0.011971925141011254
- 0.014458261438686257
- 0.01734152991583261
- 0.020667466494127266
- 0.02448519270213934
- 0.02884733658986194
- 0.03381015364324406
- 0.0394336476987224
- 0.045781691857753354
- 0.05292214940134466
- 0.06092699470458736
- 0.06987243415118778
- 0.07983902704799936
- 0.09091180653955425
- 0.1031804005225954
- 0.11673915256060852
- 0.13168724279835387
- 0.14812880887639787
- 0.16617306684564512
- 0.18593443208187052
- 0.207532640200251
- 0.231092867969897
- 0.2567458542283846
- 0.28462802079628763
- 0.31488159339170957
- 0.34765472254481466
- 0.38310160451236014
- 0.4213826021922288
- 0.46266436603796024
- 0.5071199549732823
- 0.5549289573066435
- 0.6062776116457449
- 0.6613589278120725
- 0.7203728077554274
- 0.7835261664684589
- 0.8510330529011965
- 0.923114770875582
- 1.0
# only used when sampling until convergence
kwargs:
max_steps: 20000
check_interval: 100
trust_threshold: 30.0
rel_acor_threshold: 0.075
########################################################################################
# RISKS:
# Every entry in this array defines a scenario for which the stage
# `predict-risk` will compute the risks, given a set of samples
risks: []
########################################################################################
# PREVALENCES:
# The structure of these scenarios is very similar to the risk.
# But here, one cannot provide a diagnose. This only computes the
# prevalence of a certain pattern of involvement for a defined
# diagnostic modality. It can, however, be compared to the prevalence
# in the data.
prevalences: []