-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsmooth.py
107 lines (87 loc) · 4.16 KB
/
smooth.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import logging
import statistics
from typing import List, TypeVar, Union
import numpy as np
log = logging.getLogger(__name__)
# typeguard raises errors in tests when using List[float] instead of list
T = TypeVar("T", list, np.ndarray)
def _medians_353(zz: Union[List[float], np.ndarray], nbins: int) -> None:
"""Applies running median smoothing with window sizes 3, 5, 3 to input.
Args:
zz (Union[List[float], np.ndarray]): array to smooth
nbins (int): number of bins in array
"""
for i_median in range(3):
yy = zz.copy() # yy stays constant at each step in the loop
medianType = 3 if i_median != 1 else 5
ifirst = 1 if i_median != 1 else 2
ilast = nbins - 1 if i_median != 1 else nbins - 2
# do central elements first in (ifirst, ilast) range
for ii in range(ifirst, ilast):
zz[ii] = statistics.median(yy[ii - ifirst : ii - ifirst + medianType])
if i_median == 0: # first median 3
# first bin, proceedings use y_1 (=yy[0]), while ROOT uses zz[0]
zz[0] = statistics.median([3 * zz[1] - 2 * zz[2], yy[0], zz[1]])
# last bin, proceedings use y_n (=yy[-1]), while ROOT uses zz[-1]
zz[-1] = statistics.median([zz[-2], yy[-1], 3 * zz[-2] - 2 * zz[-3]])
if i_median == 1: # median 5
zz[1] = statistics.median(yy[0:3])
# second to last bin
zz[-2] = statistics.median(yy[-3:])
def smooth_353QH_twice(hist: T) -> T:
"""Runs the 353QH algorithm twice and returns smooth version of the input.
For documentation see these proceedings https://cds.cern.ch/record/186223/ on page
292. The algorithm runs twice to avoid over-smoothing peaks and valleys. The
algorithm is not aware of statistical uncertainties per entry in the array. See also
https://root.cern.ch/doc/master/classTH1.html#aeb935cae10dbf9cd484bee1b6a549f83 for
the ROOT implementation.
Args:
hist (Union[list, np.ndarray]): array to smooth
Returns:
Union[list, np.ndarray]: smooth version of input
"""
nbins = len(hist)
if nbins < 3:
log.warning("at least three points needed for smoothing, no smoothing applied")
return hist
if isinstance(hist, np.ndarray):
# ensure dtype is not int to avoid rounding in smooth histogram
hist = hist.astype("float")
zz = np.array(hist, dtype=float, copy=True)
for i_353QH in range(2): # run 353QH twice
# do running median with window sizes 3, 5, 3
_medians_353(zz, nbins)
yy = zz.copy()
# quadratic interpolation for flat segments
for ii in range(2, nbins - 2):
if zz[ii - 1] != zz[ii] or zz[ii] != zz[ii + 1]:
continue # not flat
left_larger = zz[ii - 2] - zz[ii] # two bins left larger by this amount
right_larger = zz[ii + 2] - zz[ii] # two bins right larger by this amount
if left_larger * right_larger <= 0:
continue # current position is part of larger scale slope
jk = 1
if abs(right_larger) > abs(left_larger):
jk = -1 # left is more flat than right
yy[ii] = -0.5 * zz[ii - 2 * jk] + zz[ii] / 0.75 + zz[ii + 2 * jk] / 6
yy[ii + jk] = 0.5 * (zz[ii + 2 * jk] - zz[ii - 2 * jk]) + zz[ii]
# running means
for ii in range(1, nbins - 1):
zz[ii] = 0.25 * yy[ii - 1] + 0.5 * yy[ii] + 0.25 * yy[ii + 1]
zz[0] = yy[0]
zz[-1] = yy[-1]
if i_353QH == 0:
# algorithm has been run once
rr = zz.copy() # save computed values
# calculate residuals: (original) - (after 353QH)
zz = np.asarray([hist[ii] - zz[ii] for ii in range(0, nbins)])
# zz is now "rough", while rr is "smooth"
# "twicing": run 353QH again on "rough" zz and add to "smooth" rr
hist_new = hist.copy() # histogram after smoothing
for ii in range(nbins):
if min(hist) < 0:
hist_new[ii] = rr[ii] + zz[ii]
# result is positive if no negative values are in input
else:
hist_new[ii] = max(rr[ii] + zz[ii], 0)
return hist_new