-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathdetermineDiffusion.nim
551 lines (497 loc) · 22.2 KB
/
determineDiffusion.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
import std / [os, sequtils, tables, stats, strformat, strutils, options]
import ingrid / [tos_helpers, ingrid_types]
import pkg / [nimhdf5, datamancer, ggplotnim, seqmath]
import unchained
## IMPORTANT NOTICE:
## The code here (and in related files touching on the diffusion constant)
## uses the variable `σT` to talk about the diffusion constant `D_T`:
## `D_T = σ_T / √( drift distance )`
## or written as the definition of `σ_T`:
## `σ_T = D_T · √( drift distance )`
##
## This is of course *INACCURATE*, but done for "historic" reasons. I started with
## an implementation that directly computed σ_T, but then switched to extracting
## the D_T value from it. We could update the code, but it would break the existing
## data files that contain D_T data, but under the term "σ_T".
##
## The reason for this is that as part of the fake data simulation here in
## `simulateRmsTrans`, we *first* sample a distance a cluster will diffuse
## and then generate from a normal distribution with `σ = D_T * √x`:
##
## let x = rnd.gauss(mu = 0.0, sigma = σT * sqrt(zDrift))
## let y = rnd.gauss(mu = 0.0, sigma = σT * sqrt(zDrift))
##
## while this should clearly say "D_T". Just be aware of it!
## XXX: a bit annoying that this is here...
const CdlFile = "/home/basti/CastData/data/CDL_2019/calibration-cdl-2018.h5"
const RmsCleaningCut = 1.5
proc linear(p: Tensor[float], x: float): float =
result = p[0] * x + p[1]
import pkg / numericalnim except linspace
proc resultStr[T: Tensor | seq](pRes, pErr: T, χ²dof: float): string =
result.add &"χ²/dof = {χ²dof:.4f}\n"
for i in 0 ..< pRes.len.int:
result.add &"p[{i}] = {pRes[i]:.4e} ± {pErr[i]:.4e}\n"
result.strip()
proc echoResult(pRes, pErr: Tensor[float], χ²dof: float) =
echo resultStr(pRes, pErr, χ²dof)
proc high[T](x: Tensor[T]): int = x.size.int - 1
const CutVal = "CutVal" # the actual cut value required to get `ε`!
const Err = 0.025
proc gauss[T: Tensor[float] | seq[float]](p: T, x: float): float =
let N = p[0]
let mean = p[1]
let sigma = p[2]
let
arg = (x - mean).float / sigma.float
res = exp(-0.5 * arg * arg)
result = N / sqrt(2*PI) * res
proc gaussSeq(p: seq[float], x: float): float =
result = gauss(p.toTensor, x)
proc toFitDf[T: Tensor | seq](params: T, xmin, xmax: float): DataFrame =
let xFit = seqmath.linspace(xmin, xmax, 1000)
let yFit = xFit.mapIt(gauss(params, it))
result = toDf({ "xFit" : xFit,
"yFit" : yFit })
import mpfit
proc fitRmsTransverse(data: Tensor[float], bins: int): (DataFrame, seq[float], string) =
var (hist, bins) = histogram(data.toSeq1D, bins = bins)
let binWidth = bins[1] - bins[0]
bins = bins[0 ..< ^1].mapIt(it + binWidth / 2.0) # remove last bin edge, move half bin right to have centers
let hArgMax = hist.argmax
let binArgMax = bins[hist.argmax]
let histMax = hist[hArgMax].float
let binLowerIdx = max(0, (hArgMax - (0.03 * bins.len.float)).round.int)
let binUpperIdx = bins.high #(hArgMax + (0.055 * bins.len.float)).round.int
let binUpper = bins[^1] # bins.lowerBound(data.percentile(98))]
let params = @[histMax, binArgMax, abs(binArgMax - binUpper)]
let xT = bins[binLowerIdx .. binUpperIdx]
let yT = hist[binLowerIdx .. binUpperIdx].mapIt(it.float)
let eT = yT.mapIt(1.0) #sqrt(x))
let (pRes, res) = fit(gaussSeq, params, xT, yT, eT)
let xFit = linspace(binArgMax, binUpper, 200)
let yFit = xFit.mapIt(gauss(pRes, it))
let resStr = resultStr(pRes, res.error, res.chiSq)
echo resStr
result = (toFitDf(pRes, xT.min, xT.max), pRes, resStr)
proc determineFromData(x: seq[float]): float =
var (hist, bins) = histogram(x, bins = 100)
let binWidth = bins[1] - bins[0]
bins = bins[0 ..< ^1].mapIt(it + binWidth / 2.0) # remove last bin edge, move half bin right to have centers
let hArgMax = hist.argmax
let binArgMax = bins[hist.argmax]
let histMax = hist[hArgMax].float
var idx = hArgMax
while hist[idx] > histMax * 0.1:
inc idx
result = bins[idx]
import ingrid / gas_physics
import helpers / sampling_helper
import xrayAttenuation
import std / random
proc toEdf(x: seq[float], bins: seq[float]): seq[float] =
## Computes the EDF of the input data `x` given some `bins`.
##
## The bins are used as boundaries to count elements below each bin edge. The
## result contains `bins.len` elements, all in [0, 1]
let xS = x.sorted
var i = 0
result = newSeq[float](bins.len)
for j, b in bins:
while i < xS.high and xS[i] <= b:
inc i
result[j] = i.float / x.len.float
#for x in mitems(result):
# x = x / result[^1]
doAssert result.allIt(it <= 1.0)
proc kolmogorovSmirnov(x1, x2: seq[float]): float =
## Compute the Kolmogorov-Smirnov test for two datasets.
##
## The data is binned first to min & max of the combined data range and based on the
## associated EDF the KS test is performed.
##
## ``KS(x) = sup_x | EDF₁(x) - EDF₂(x) |``
##
## or in ~code
##
## ``KS(x) = max(abs(EDF₁ -. EDF₂(x)))``
let range = (min: min(x1.min, x2.min), max: max(x1.max, x2.max))
let bins = linspace(range[0], range[1], 200)
let x1Edf = x1.toEdf(bins)
let x2Edf = x2.toEdf(bins)
result = 0.0
for i, b in bins:
result = max( result, abs(x1Edf[i] - x2Edf[i]) )
proc rankData[T](x: seq[T]): seq[int] =
## Returns a sequence of `x.len` that contains the ranks of the values in
## the input data.
## XXX: write me
discard
proc cramerVonMises(xIn, yIn: seq[float]): float =
## Computes the Cramér-von Mises criterion for two samples x and y. The
## two samples do not need to be of the same size.
##
## NOTE: This is a Bing Chat implementation. It directly computes the
## T value by buildin the empirical cumulative distribution functions.
## (Yes, really. I was lazy :D)
let x = xIn.sorted
let y = yIn.sorted
let m = x.len # size of first sample
let n = y.len # size of second sample
let N = m + n # size of combined sample
let dHN = 1.0 / N # change in combined ECDF
var i = 0 # index for x
var j = 0 # index for y
var Fm = 0.0 # empirical CDF of x
var Gn = 0.0 # empirical CDF of y
result = 0.0 # Cramér-von Mises criterion
while i < m and j < n: # loop until one sample is exhausted
if x[i] <= y[j]: # next value is from x
Fm += 1.0 / m # update Fm
inc i # increment i
else: # next value is from y
Gn += 1.0 / n # update Gn
inc j # increment j
result += (Fm - Gn) ^ 2 * dHN # update result using change in HN
for k in i ..< m: # loop until x is exhausted
Fm += 1.0 / m # update Fm
result += (Fm - Gn) ^ 2 * dHN # update result using change in HN
for k in j ..< n: # loop until y is exhausted
Gn += 1.0 / n # update Gn
result += (Fm - Gn) ^ 2 * dHN # update result using change in HN
result *= m * n / N # scale result by sample sizes and combined size
import os
import arraymancer
var HistoPath = "/home/basti/Sync/"
let HistoFile = "test_histo.pdf"
proc simulateRmsTrans(rnd: var Rand, rmsReal: seq[float], gasMixture: GasMixture, σT: float, energy: keV,
isBackground: bool, run: int): float =
## 2. get absorption length for this energy
#rnd = initRand(42) # same RND for every run!
let λ = if not isBackground: absorptionLengthCAST(gasMixture, energy)
else: 0.cm
## 3. generate a sampler for the conversion point using λ
proc expFn(x: float, λ: float): float =
if not isBackground:
result = 1.0 / λ * exp(- x / λ)
else:
result = 1.0 # uniform for backgound. Mostly muons parallel to readout. At any distance from cathode
let fnSample = (proc(x: float): float =
result = expFn(x, λ.float)
)
let distSampler = sampler(fnSample, 0.0, 3.0, num = 1000)
var energySampler = if isBackground:
sampler(
(proc(x: float): float =
let λ = -10.0 / (ln(0.4))
result = exp(-x / λ)),
0.1, 10.0, num = 1000
)
else:
Sampler() # not needed
## 4. now sample N events with each `num` electrons, evaluate `rmsTransverse` for each
const N = 5000
var rms = newSeq[float](N)
for i in 0 ..< N:
## 5. define number of electrons expected for this energy
## (or also receive `hits` dataset? Problematic due to background requirement!
## instead use heuristic?)
let num = if isBackground: rnd.sample(energySampler) / 0.026
else: energy / 26.eV
## 6. sample a conversion position for this event, generate sampler for this events
## possible diffusion
let P =
block:
let conversionPoint = rnd.sample(distSampler) # point 'behind' cathode at which conversion takes place
(proc(rnd: var Rand): float =
let zDrift = 3.0 - conversionPoint # use conversion point to define a closure to sample for each electron
let x = rnd.gauss(mu = 0.0, sigma = σT * sqrt(zDrift))
let y = rnd.gauss(mu = 0.0, sigma = σT * sqrt(zDrift))
result = sqrt(x*x + y*y)
)
let energySigma = if isBackground: 0.4 # larger range of energies allowed for background (but already uniform)
else: 0.1
let numThis = max(rnd.gauss(mu = num, sigma = num * energySigma).floor.int, 15)
# 2D Tensor of the pixel positions to later compute covariance matrix
var posT = zeros[float]([numThis, 2])
for j in 0 ..< numThis:
let dist = P(rnd).μm.to(mm).float
let φ = rnd.rand(0.0 .. 2*PI)
let xp = (dist * cos(φ) + 7.0)
let yp = (dist * sin(φ) + 7.0)
posT[j, _] = [xp, yp].toTensor.unsqueeze(0)
## Compute rotated coordinate system using eigenvectors of covariance matrix
let cov = covariance_matrix(posT, posT)
let (eigenval, eigenvec) = symeig(cov, return_eigenvectors = true)
let R = eigenvec
# rotate the existing positions into the rotated coordinates using the eigenvectors
let posTrot = posT * R
# 0 corresponds to the shorter axis
let rmsT = posTrot[_, 0].std()
rms[i] = rmsT
## 6. now compute the loss
result = cramerVonMises(rmsReal, rms) #kolmogorovSmirnov(rmsReal, rms)
let df = bind_rows([("real", toDf({"rmsT" : rmsReal})), ("sim", toDf({"rmsT" : rms}))], "typ")
ggplot(df, aes("rmsT", fill = "typ")) +
geom_histogram(bins = 100, hdKind = hdOutline, alpha = 0.5, position = "identity", density = true) +
ggtitle(&"Run: {run} with Cramér-von Mises = {result}") +
ggsave(HistoPath / HistoFile)
proc determineMonteCarlo(rmsT: seq[float], energy: keV, isBackground = false, run = -1): (float, float) =
## It determines the best diffusion constant, `D_T`. That is:
##
## `σ_T = D_T · √( drift distance )`
##
## and not the diffusion coefficient `σ_T`!
##
## Determines the best diffusion matching the data using a MonteCarlo approach
## combined with a simple non linear optimization strategy that checks the
## improvement via Kolmogorov-Smirnov. We aim to reproduce the best possible
## match of the rmsTransverse data using the known data and the target energy.
## 1. define gas mixture
let gm = initCASTGasMixture()
## 2. start diffusion value
var diffusion = 660.0
## 3. define dual number
let rmsReal = rmsT
var diffD = diffusion
var rnd = initRand(1212)
#var ks = rnd.simulateRmsTrans(rmsReal, gm, diffD, energy)
var ks = rnd.simulateRmsTrans(rmsReal, gm, diffD, energy, isBackground, run)
# define a function that computes the derivative of ks with respect to diffD
# this can be done either analytically or numerically
# here we use a simple finite difference approximation
proc dks_ddiffD(diffD: float): float =
let h = 1.0 # big enough step size to overcome randomness of MC process!
result = (rnd.simulateRmsTrans(rmsReal, gm, diffD + h, energy, isBackground, run) -
rnd.simulateRmsTrans(rmsReal, gm, diffD - h, energy, isBackground, run)) / (2 * h)
# define a stopping criterion
let max_iter = 100 # maximum number of iterations
let tol = 1e-6 # tolerance for change in diffD
# iterate Newton's method until convergence or maximum iterations reached
var iter = 0 # iteration counter
var bestEstimate = diffD
var bestLoss = ks
proc lr(iter: int): float =
let a = 10.0
let c = 1.0 / 100.0
let λ = -ln(c) * c
result = a * exp(-λ * iter.float)
#var lr = 5.0
var numBad = 0 # keeps track of number of iters without improvements. Alternative: adjust gradient?
const MaxConsBad = 10 # maximum consecutive bad samples
while iter < max_iter and numBad < MaxConsBad:
let delDiffD = dks_ddiffD(diffD)
let diffD_new = diffD - lr(iter) * delDiffD # update diffD using Newton's formula
echo "iter = ", iter, ", ∂σT = ", delDiffD, " and new diff ", diffD_new, " KS diff? ", ks, " and best: ", bestLoss
if abs(diffD_new - diffD) < tol: # check if change is small enough
echo "Stopping iteratoin due to difference = ", abs(diffD_new - diffD)
break # stop the loop
diffD = clamp(abs(diffD_new), 400.0, 800.0) # assign new value to diffD
ks = rnd.simulateRmsTrans(rmsReal, gm, diffD, energy, isBackground, run) # update ks using new diffD
iter += 1 # increment iteration counter
if ks < bestLoss:
bestLoss = ks
bestEstimate = diffD
numBad = 0
# copy the last plot to one based on this run number
copyFile(HistoPath / HistoFile, HistoPath / &"histo_sim_vs_real_run_{run}_loss_{bestLoss}_sigma_{bestEstimate}.pdf")
else:
inc numBad
if numBad > MaxConsBad div 2:
# reset estimate back to best estimate, i.e. try again with smaller 'learning rate'
echo "Resetting current estimate: ", diffD, " back to previous best: ", bestEstimate
diffD = bestEstimate
result = (bestEstimate, bestLoss) # diffD # return the final value of diffD as the optimal diffusion parameter
# now apply correction from 1D to 2D?
#result = sqrt(2.0) * result
when false:
while abs(0.1 * diffD) > 1e-2:
echo "Starting with diffusion : ", diffD, " and ks result ", ks
diffusion = diffusion - ks.d * 1e19
diffD = D(diffusion, 1.0)
ks = rnd.simulateRmsTrans(rmsReal, gm, diffD, energy)
echo "which is now: ", diffD
# 1. MC simulate rms transverse based on energy & diffusion
# 2. compute KS test of real and simulated
# 3. update diffusion using autograd
## XXX: MAYBE adjust the scaling parameter based on the whole range? Or
## generally on the 'absolute' scale of sigma? using 2 sigma if sigma
## is huge is dumb!
#const Scale = 2.0
import nimhdf5 / serialize_tables
const Scale = 1.65
const MaxZ = 3.0
const CacheTabFile = "/dev/shm/cacheTab_diffusion_runs.h5"
type
TabKey = int
# ^-- run number
TabVal = (float, float)
# ^-- diffusion value
# ^-- loss of gradient descent via Cramér-von Mises
CacheTabTyp = Table[TabKey, TabVal]
## Global helper table that stores diffusion values determined from
## the RMS data of this run
var CacheTab =
if fileExists(CacheTabFile):
tryDeserializeH5[CacheTabTyp](CacheTabFile)
else:
initTable[TabKey, TabVal]()
proc runAvailable(run: int): bool =
if run in CacheTab:
result = true
else:
if fileExists(CacheTabFile):
let tab = tryDeserializeH5[CacheTabTyp](CacheTabFile)
# merge `tab` and `CacheTab`
for k, v in tab:
CacheTab[k] = v # overwrite possible existing keys in table
# write merged file
CacheTab.tryToH5(CacheTabFile)
result = run in CacheTab # still not in: not available
const BackgroundDiffusionCorrection = 40.0
proc getDiffusionForRun*(run: int, isBackground: bool): float =
## Attempts to return the diffusion determined for run `run`. If the CacheTab
## does not contain the run yet, it raises an exception.
if run in CacheTab:
result = CacheTab[run][0]
else:
raise newException(ValueError, "Diffusion for run " & $run & " not determined yet. Please " &
"generate the cache table of diffusion values using `determineDiffusion`.")
if isBackground:
result = result - BackgroundDiffusionCorrection
proc getDiffusion*(rmsT: seq[float],
isBackground: bool,
run = -1,
energy = 0.0.keV,
useCache = true): (float, float) =
## DataFrame must contain `rmsTransverse` column!
##
## It returns the diffusion constant, `D_T`. That is:
##
## `σ_T = D_T · √( drift distance )`
##
## and not the diffusion coefficient `σ_T`!
##
## XXX: The `energy` argument here is a bit dangerous. We shouldn't use the
## energy of the target energy we want to simulate later for an NN cut value,
## because the energy that is needed is the one that actually reproduces the input
## data best to determine its diffusion only!
var (df, pRes, _) = fitRmsTransverse(rmsT.toTensor, 100)
let scaleShift = Scale * abs(pRes[2])
#let scaleShift = 0.25
var arg = pRes[1] + scaleShift # 0.15 #scaleShift
#echo "Diffusion to use from pRes + 0.15 : ", arg / sqrt(MaxZ) * 1000.0
#echo "Diffusion to use from pRes + scaleShift : ", arg / sqrt(MaxZ) * 1000.0
#arg = determineFromData(rmsT)
if useCache and run > 0 and runAvailable(run):
result = CacheTab[run] # potentially re-read, but now available
else:
# try to reload first, maybe available then
result = determineMonteCarlo(rmsT, energy, isBackground = isBackground, run = run)
if useCache and run > 0:
CacheTab[run] = result
# serialize file
CacheTab.tryToH5(CacheTabFile)
#result = arg / sqrt(MaxZ) * 1000.0
if isBackground:
## All our background determinations have a bias to about ~30 larger than their closest 5.9 keV
## calibration run. Hence for now just subtract that to get the ~likely correct~ value.
## Run this file as a standalone on all data files and see the generated plot.
result[0] = result[0] - BackgroundDiffusionCorrection
echo "USED RESULT: ", result
if run > 0:
let dfD = toDf(rmsT)
echo "GENERATING PLOT:::::::::::\n\n\n\n"
let yMax = df["yFit", float].max
ggplot(dfD, aes("rmsT")) +
geom_histogram(bins = 100, hdKind = hdOutline) +
geom_line(data = df, aes = aes("xFit", "yFit"), color = "purple") +
geom_linerange(aes = aes(x = arg, y = 0.0, yMin = 0.0, yMax = yMax), color = "green") +
scale_y_continuous() +
ylab("rmsT") +
theme_font_scale(1.0, family = "serif") +
ggsave(&"{HistoPath}/run_{run}_rmsTransverseFit.pdf")
echo "RESULT COMES OUT TO ", result, " from fit parameters? ? ", pRes, " FOR RUN: ", run
#if true: quit()
## XXX: THIS IS OUTDATED and dangerous! It gives the wrong numbers to use for the
## DF containing the real data read via `readValidDsets`, i.e. give the wrong inputs
## to the NN when evaluating the effective efficiency (or anything working with network
## prediction of real data!)
#proc getDiffusion*(df: DataFrame): float =
# ## DataFrame must contain `rmsTransverse` column!
# let (df, pRes, _) = fitRmsTransverse(df["rmsTransverse", float], 100)
# result = (pRes[1] + Scale * abs(pRes[2])) / sqrt(MaxZ) * 1000.0
proc determineDiffusion(df: DataFrame, outpath: string, runInput: int, useCache, useTeX: bool) =
var dfAll = newDataFrame()
for (tup, subDf) in groups(df.group_by("run")):
let run = tup[0][1].toInt
if runInput > 0 and run != runInput: continue
let runType = parseEnum[RunTypeKind](subDf["typ"].unique.item(string))
let isBackground = runType == rtBackground
let energy = if not isBackground: subDf["energy"].unique.item(float).keV
else: 0.keV
# or use cache ? At least if available :)
let (σT, loss) = getDiffusion(subDf["rms", float].toSeq1D,
isBackground = isBackground,
run = run,
energy = energy,
useCache = useCache)
dfAll.add toDf( {"run" : run, "σT" : σT, "loss" : loss, "isBackground" : isBackground })
let ylabel = if useTeX: r"$D_T$ [$\si{μm.cm^{-\frac{1}{2}}}$]" else: "D_T [μm/√cm]"
ggplot(dfAll, aes("run", "σT", color = "loss", shape = "isBackground")) +
geom_point() +
xlab("Run number") + ylab(ylabel) +
ggtitle("Determined diffusion constant of all CAST runs") +
themeLatex(fWidth = 0.9, width = 600, baseTheme = singlePlot) +
ggsave(&"{outpath}/σT_per_run.pdf", useTeX = useTeX, standalone = useTeX)
when isMainModule:
import ../nn/io_helpers
proc readData(fname: string): DataFrame =
withH5(fname, "r"):
let fileInfo = h5f.getFileInfo()
let chipNumber = fileInfo.centerChip
result = newDataFrame()
for run in fileInfo.runs:
let group = (recoBase() & $run)
# need rmsTransverse to determine diffusion
let grp = h5f[(group / "chip_" & $chipNumber).grp_str]
# very weak cuts that should also apply well for background data that remove
# obvious events that are noisy, and not well defined photons or tracks!
## NOTE: These do not apply to the CDL datasets!
# try to read attribute and if not given `tfKind` skip this run
var tfKind = tfMnCr12 ## Default we assume
let runGrp = h5f[group.grp_str]
if "tfKind" in runGrp.attrs:
tfKind = parseEnum[TargetFilterKind](runGrp.attrs["tfKind", string])
let passIdx = h5f.cutOnProperties(
grp,
crSilver,
("width", 0.0, 6.0),
("rmsTransverse", 0.0, 1.3),
("hits", 20.0, Inf))
# now need `rms` to calculate diffusion
let rms = h5f[group / &"chip_{chipNumber}/rmsTransverse", passIdx, float]
let df = toDf({ "rms" : rms,
"run" : run,
"typ" : $fileInfo.runType,
"energy" : toXrayLineEnergy(tfKind)
})
result.add df
proc main(fnames: seq[string],
plotPath: string = "/home/basti/Sync",
histoPlotPath: string = "/home/basti/Sync",
run = -1,
useCache = true,
useTeX = false) =
## `plotPath` is where the D_T for all runs plot will be placed.
## `histoPlotPath` is the path for all the histograms of the distributions
## created during optimization.
# Adjust the plotting path for the histograms
HistoPath = histoPlotPath
var df = newDataFrame()
for f in fnames:
df.add readData(f)
determineDiffusion(df, plotPath, run, useCache, useTeX)
#when isMainModule:
import cligen
dispatch main