Skip to content

Commit

Permalink
PR review modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
Aretno committed Dec 17, 2019
1 parent d5efe2a commit e90cb12
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 110 deletions.
Git LFS file not shown
Binary file modified invisible_cities/database/test_data/test_psf.npz
Binary file not shown.
16 changes: 8 additions & 8 deletions invisible_cities/evm/nh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,11 @@ class EventPassedFilter(tb.IsDescription):


class PSFfactors(tb.IsDescription):
xr = tb.Float32Col(pos=0)
yr = tb.Float32Col(pos=1)
zr = tb.Float32Col(pos=2)
x = tb.Float32Col(pos=3)
y = tb.Float32Col(pos=4)
z = tb.Float32Col(pos=5)
factor = tb.Float32Col(pos=6)
nevt = tb. UInt32Col(pos=7)
nevt = tb. UInt32Col(pos=0)
xr = tb.Float32Col(pos=1)
yr = tb.Float32Col(pos=2)
zr = tb.Float32Col(pos=3)
x = tb.Float32Col(pos=4)
y = tb.Float32Col(pos=5)
z = tb.Float32Col(pos=6)
factor = tb.Float32Col(pos=7)
23 changes: 12 additions & 11 deletions invisible_cities/io/kdst_io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ def test_xy_writer(config_tmpdir, corr_toy_data, writer):
def test_psf_writer(config_tmpdir):
output_file = os.path.join(config_tmpdir, "test_psf.h5")

xr, yr, zr = np.linspace(-50, 50, 101), np.linspace(-20, 20, 101), np.linspace(20, 30, 11)
xdim, ydim, zdim = 101, 101, 11
xr, yr, zr = np.linspace(-50, 50, xdim), np.linspace(-20, 20, ydim), np.linspace(20, 30, zdim)
x, y, z = 3, 4, 5
factors = np.linspace(0, 1, 101*101*11).reshape(101, 101, 11)
nevt = np.linspace(0, 101*101*11 -1, 101*101*11).reshape(101, 101, 11)
factors = np.linspace(0, 1, xdim*ydim*zdim ).reshape(xdim, ydim, zdim)
nevt = np.arange (0, xdim*ydim*zdim, 1).reshape(xdim, ydim, zdim)

with tb.open_file(output_file, 'w') as h5out:
write = psf_writer(h5out)
Expand All @@ -98,11 +99,11 @@ def test_psf_writer(config_tmpdir):

xx, yy, zz = np.meshgrid(xr, yr, zr, indexing='ij')

assert_allclose(xx .flatten() , psf.xr .values)
assert_allclose(yy .flatten() , psf.yr .values)
assert_allclose(zz .flatten() , psf.zr .values)
assert_allclose(factors .flatten() , psf.factor.values)
assert_allclose(nevt .flatten() , psf.nevt .values)
assert_allclose([x] * len(factors.flatten()), psf.x .values)
assert_allclose([y] * len(factors.flatten()), psf.y .values)
assert_allclose([z] * len(factors.flatten()), psf.z .values)
assert_allclose(xx .flatten(), psf.xr .values)
assert_allclose(yy .flatten(), psf.yr .values)
assert_allclose(zz .flatten(), psf.zr .values)
assert_allclose(factors .flatten(), psf.factor.values)
assert_allclose(nevt .flatten(), psf.nevt .values)
assert_allclose(np.full(factors.size, x), psf.x .values)
assert_allclose(np.full(factors.size, y), psf.y .values)
assert_allclose(np.full(factors.size, z), psf.z .values)
176 changes: 96 additions & 80 deletions invisible_cities/reco/psf_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
from typing import Tuple
from typing import List

from ..core.core_functions import shift_to_bin_centers

def createPSF(pos : Tuple[np.ndarray, ...],
charge : np.ndarray,
nbins : int,
ranges : List[List[float]]
) -> Tuple[np.ndarray, np.ndarray, List[np.ndarray]] :
from .. core.core_functions import shift_to_bin_centers
from .. core.core_functions import in_range
from .. database import load_db

def create_psf(pos : Tuple[np.ndarray, ...],
charge : np.ndarray,
nbins : int,
ranges : List[List[float]]
) -> Tuple[np.ndarray, np.ndarray, List[np.ndarray]] :
"""
Computes the point-spread (PSF) function of a given dataset.
Expand All @@ -31,14 +33,91 @@ def createPSF(pos : Tuple[np.ndarray, ...],
entries, edges = np.histogramdd(pos, nbins, range=ranges, normed=False)
sumC , edges = np.histogramdd(pos, nbins, range=ranges, normed=False, weights=charge)
with np.errstate(divide='ignore', invalid='ignore'):
psf = np.nan_to_num(sumC/entries)
psf = np.nan_to_num(sumC / entries)

centers = [shift_to_bin_centers(edge) for edge in edges]

return psf, entries, centers


def add_variable_weighted_mean(df : pd.DataFrame,
varMean : str,
varWeight : str
) -> pd.DataFrame :
"""
Adds the average of a variable weighted by another to a
grouped hits DST 'df' (grouped using groupby, by event id).
Parameters
----------
df : dataframe (groupby by event and npeak to do it peak by peak)
varMean : variable to be averaged.
varWeight : variable to be uses as the weight.
"""
mean, weight = df.loc[:, (varMean, varWeight)].values.T
df[varMean + 'peak'] = np.average(mean, weights=weight)

return psf, entries, [shift_to_bin_centers(edge) for edge in edges]

def add_empty_sensors_and_normalize_q(df : pd.DataFrame,
var : List[str],
ranges : List[List[float]],
sampleWidth : List[float],
database : pd.DataFrame
) -> pd.DataFrame :
"""
Adds empty sensors to the hDST
def hdst_PSF_processing(dsts : pd.DataFrame,
Parameters
----------
df : dataframe (groupby by event and npeak to do it peak by peak)
var : dimensions to be considered.
Returns
----------
df : dst with empty sipm hits.
"""
delta_x = np.diff(ranges[0])[0]/2
delta_y = np.diff(ranges[1])[0]/2

sel_x = in_range(database.X, df.Xpeak.unique()[0] - delta_x, df.Xpeak.unique()[0] + delta_x)
sel_y = in_range(database.Y, df.Ypeak.unique()[0] - delta_y, df.Ypeak.unique()[0] + delta_y)

sensors = database[sel_x & sel_y]

fill_dummy = np.zeros(len(sensors))

pd_dict = {}
pd_dict['event' ] = np.full(len(sensors), df.event.unique())
pd_dict['time' ] = np.full(len(sensors), df.time .unique())
pd_dict['npeak' ] = np.full(len(sensors), df.npeak.unique())
pd_dict['Xpeak' ] = np.full(len(sensors), df.Xpeak.unique())
pd_dict['Ypeak' ] = np.full(len(sensors), df.Ypeak.unique())
pd_dict['nsipm' ] = fill_dummy
pd_dict['X' ] = sensors.X
pd_dict['Y' ] = sensors.Y
pd_dict['Xrms' ] = fill_dummy
pd_dict['Yrms' ] = fill_dummy
pd_dict['Z' ] = np.full(len(sensors), df. Z.min())
pd_dict['Q' ] = fill_dummy
pd_dict['E' ] = fill_dummy
pd_dict['Qc' ] = fill_dummy
pd_dict['Ec' ] = fill_dummy
pd_dict['track_id'] = fill_dummy

df2 = pd.DataFrame(pd_dict)
df_out = df.merge(df2, on=list(df), how='outer')
df_out.drop_duplicates(subset=['X', 'Y'], inplace=True, keep='first')
df_out['NormQ'] = df_out.Q/df_out.Q.sum()
df_out['nsipm'] = np.full(len(df_out), len(df))

return df_out


def hdst_psf_processing(dsts : pd.DataFrame,
ranges : List[List[float]],
sampleWidth : List[float]
sampleWidth : List[float],
detector_db : str,
run_number : int
) -> pd.DataFrame :
"""
Adds the necessary info to a hits DST to create the PSF, namely the relative position and the normalized Q.
Expand All @@ -53,79 +132,16 @@ def hdst_PSF_processing(dsts : pd.DataFrame,
----------
hdst : hits after processing to create PSF.
"""
def AddVariableWeightedMean(df : pd.DataFrame,
varMean : str,
varWeight : str
) -> pd.DataFrame :
"""
Adds the average of a variable weighted by another to a
grouped hits DST 'df' (grouped using groupby, by event id).
Parameters
----------
df : groupby by event and npeak.
varMean : variable to be averaged.
varWeight : variable to be uses as the weight.
Returns
----------
df : dst with the weighted average.
"""
df[varMean + 'peak'] = (df[varMean]*df[varWeight]).sum()/df[varWeight].sum()
return(df)

def AddEmptySensors(df : pd.DataFrame,
var : List[str]
) -> pd.DataFrame :
"""
Adds empty sensors to the hDST
Parameters
----------
df : groupby by event and npeak.
var : dimensions to be considered.
Returns
----------
df : dst with empty sipm hits.
"""
distance = (np.diff(ranges)/2).flatten()
means = [int(df[f'{v}peak'].mean()) for v in var[:len(ranges)]]
means = [mean - mean%sampleWidth[i] for i, mean in enumerate( means)]
varrange = [[means[i] - d, means[i] + d + sampleWidth[i]] for i, d in enumerate(distance)]
allbins = [int(np.diff(rang)/sampleWidth[i]) for i, rang in enumerate(varrange)]
Hs, edges = np.histogramdd(tuple(df[v].values for v in var[:len(ranges)]), bins=allbins, range=varrange, normed=False, weights=df['Q'])
interPoints = np.meshgrid(*(shift_to_bin_centers(edge) for edge in edges), indexing='ij')
if len(ranges) < 3:
interPoints.append(np.array([df['Z'].min()] * len(interPoints[0].flatten())))

pd_dict1 = {f'{v }' : interPoints[i].flatten() for i, v in enumerate(var)}
pd_dict2 = {f'{var[i]}peak' : [df[f'{var[i]}peak'].mean()] * len(interP.flatten())
for i, interP in enumerate(interPoints[:len(ranges)])}

pd_dict3 = {}
pd_dict3['Q'] = Hs.flatten()
pd_dict3['NormQ'] = Hs.flatten() / Hs.sum()
pd_dict3['E'] = pd_dict3['NormQ'] * df['E'].sum()
pd_dict3['nsipm'] = [len(df)] * len(Hs.flatten())
pd_dict4 = {k : [df[k].min()] * len(interPoints[0].flatten()) for k in ['event', 'time', 'npeak']}
pd_dict = {**pd_dict1, **pd_dict2, **pd_dict3, **pd_dict4}

return pd.DataFrame(pd_dict).sort_values(['event', 'npeak', 'E'], ascending=[1, 1, 0]).reindex(list(df.columns).append('NormQ'), axis=1)

groupedDST = dsts.groupby(['event', 'npeak'], as_index=False)
sipm_db = load_db.DataSiPM(detector_db, run_number)
if len(ranges) >= 3:
hdst = groupedDST.apply(AddVariableWeightedMean, 'Z', 'E')
hdst = hdst.groupby(['event', 'npeak'], as_index=False).apply(AddEmptySensors, ['X', 'Y', 'Z']).reset_index(drop=True)
hdst = groupedDST.apply(add_variable_weighted_mean, 'Z', 'E')
hdst = hdst.groupby(['event', 'npeak'], as_index=False).apply(add_empty_sensors_and_normalize_q, ['X', 'Y', 'Z'], ranges, sampleWidth, sipm_db).reset_index(drop=True)
hdst['RelZ'] = hdst.Z - hdst.Zpeak
else:
hdst = groupedDST.apply(AddEmptySensors, ['X', 'Y', 'Z']).reset_index(drop=True)
#hdst = dsts
#print(hdst)
hdst['Zpeak'] = [hdst.Z.min()] * len(hdst)
hdst['RelZ'] = [0 ] * len(hdst)

hdst._is_copy = False
hdst = groupedDST.apply(add_empty_sensors_and_normalize_q, ['X', 'Y', 'Z'], ranges, sampleWidth, sipm_db).reset_index(drop=True)
hdst['Zpeak'] = np.full (len(hdst), hdst.Z.min())
hdst['RelZ' ] = np.zeros(len(hdst))

hdst['RelX' ] = hdst.X - hdst.Xpeak
hdst['RelY' ] = hdst.Y - hdst.Ypeak
Expand Down
55 changes: 46 additions & 9 deletions invisible_cities/reco/psf_functions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,69 @@
import numpy as np
import pandas as pd

from .. reco.psf_functions import hdst_PSF_processing
from .. reco.psf_functions import createPSF
from .. reco.psf_functions import hdst_psf_processing
from .. reco.psf_functions import add_variable_weighted_mean
from .. reco.psf_functions import add_empty_sensors_and_normalize_q
from .. reco.psf_functions import create_psf

from .. io.dst_io import load_dst
from .. database import load_db
from .. io.dst_io import load_dst
from .. core.testing_utils import assert_dataframes_close


def test_hdst_PSF_processing(ICDATADIR):
def test_add_variable_weighted_mean(ICDATADIR):
PATH_IN = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC.h5")
PATH_TEST = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC_psf_means.h5")

hdst = load_dst(PATH_IN, 'RECO', 'Events')
x_mean = np.average(hdst.loc[:, 'X'], weights=hdst.loc[:, 'E'])
y_mean = np.average(hdst.loc[:, 'Y'], weights=hdst.loc[:, 'E'])

add_variable_weighted_mean(hdst, 'X', 'E')
add_variable_weighted_mean(hdst, 'Y', 'E')

assert np.allclose(x_mean, hdst.Xpeak.unique())
assert np.allclose(y_mean, hdst.Ypeak.unique())

hdst_psf = pd.read_hdf(PATH_TEST)
assert_dataframes_close(hdst_psf, hdst)


def test_add_empty_sensors_and_normalize_q(ICDATADIR):
PATH_IN = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC.h5")
PATH_TEST = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC_psf_empty_sensors.h5")

hdst = load_dst(PATH_IN, 'RECO', 'Events')
group = hdst.groupby(['event'], as_index=False)
sipm_db = load_db.DataSiPM('new', 0)
hdst_processed = group.apply(add_empty_sensors_and_normalize_q, ['X', 'Y'], [[-50, 50], [-50, 50]], [10, 10], sipm_db).reset_index(drop=True)
hdst_psf = pd.read_hdf(PATH_TEST)

assert hdst_processed.NormQ.sum() == 1.0 * hdst.event.nunique()
assert hdst_processed.E.sum() == hdst.E.sum()
assert hdst_processed.Q.sum() == hdst.Q.sum()
assert_dataframes_close(hdst_psf, hdst_processed)


def test_hdst_psf_processing(ICDATADIR):
PATH_IN = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC.h5")
PATH_TEST = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC_psf.h5")

hdst = load_dst(PATH_IN, 'RECO', 'Events')
hdst_processed = hdst_psf_processing(hdst, [[-50, 50], [-50, 50]], [10, 10], 'new', 0)
hdst_psf = pd.read_hdf(PATH_TEST)

hdst_processed = hdst_PSF_processing(hdst, [[-50, 50], [-50, 50]], [10, 10])

assert hdst_psf.equals(hdst_processed)
assert_dataframes_close(hdst_psf, hdst_processed)


def test_createPSF(ICDATADIR):
def test_create_psf(ICDATADIR):
PATH_IN = os.path.join(ICDATADIR, "exact_Kr_tracks_with_MC_psf.h5")
PATH_TEST = os.path.join(ICDATADIR, "test_psf.npz")

hdst = pd.read_hdf(PATH_IN)
psf = np.load(PATH_TEST)

psf_val, entries, binss = createPSF((hdst.RelX, hdst.RelY), hdst.NormQ, [100, 100], [[-50, 50], [-50, 50]])
psf_val, entries, binss = create_psf((hdst.RelX, hdst.RelY), hdst.NormQ, [100, 100], [[-50, 50], [-50, 50]])

np.testing.assert_allclose(psf['psf' ], psf_val)
np.testing.assert_allclose(psf['entries'], entries)
Expand Down

0 comments on commit e90cb12

Please sign in to comment.