Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fit functions for map computation #872

Merged
merged 17 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions invisible_cities/core/fit_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,10 @@ def fit(func, x, y, seed=(), fit_range=None, **kwargs):

Returns
-------
fitted_fun : extended function (contains values and errors)
Fitted function.
fitted_fun : FitFunction
Extended function containing fit parameters (fitf, vals, errors, chi2, pval,
cov) and full_output from curve_fit and leastsq (infodict, mesg and ier).


Examples
--------
Expand All @@ -183,16 +185,15 @@ def fit(func, x, y, seed=(), fit_range=None, **kwargs):

kwargs['absolute_sigma'] = "sigma" in kwargs

vals, cov = scipy.optimize.curve_fit(func, x, y, seed, **kwargs)
vals, cov, infodict, mesg, ier = scipy.optimize.curve_fit(func, x, y, seed, full_output=True, **kwargs)

fitf = lambda x: func(x, *vals)
fitx = fitf(x)
errors = get_errors(cov)
ndof = len(y) - len(vals)
chi2, pval = get_chi2_and_pvalue(y, fitx, ndof, sigma_r)


return FitFunction(fitf, vals, errors, chi2, pval, cov)
return FitFunction(fitf, vals, errors, chi2, pval, cov, infodict, mesg, ier)


def profileX(xdata, ydata, nbins=100,
Expand Down
2 changes: 1 addition & 1 deletion invisible_cities/evm/ic_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _add_namedtuple_in_this_module(name, attribute_names):
('CalibVectors' , 'channel_id coeff_blr coeff_c adc_to_pes adc_to_pes_sipm pmt_active'),
('S12Params' , 'time stride length rebin_stride'),
('ZsWf' , 'indices energies'),
('FitFunction' , 'fn values errors chi2 pvalue cov'),
('FitFunction' , 'fn values errors chi2 pvalue cov infodict mesg ier'),
('TriggerParams' , 'trigger_channels min_number_channels charge height width'),
('SensorParams' , 'spectra peak_range min_bin_peak max_bin_peak half_peak_width p1pe_seed lim_ped'),
('PedestalParams' , 'gain gain_min gain_max sigma sigma_min sigma_max')):
Expand Down
151 changes: 151 additions & 0 deletions invisible_cities/reco/krmap_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import numpy as np
import pandas as pd

from .. types.symbols import KrFitFunction
from .. evm.ic_containers import FitFunction
from .. core.fit_functions import polynom, expo


def lin_seed(x : np.array, y : np.array):
'''
Estimate the seed for a linear fit of the form y = a + bx.

Parameters
----------
x : np.array
Independent variable.
y : np.array
Dependent variable.

Returns
-------
seed : tuple
Seed parameters (intercept, slope) for the linear fit.
'''
x0, x1 = x.min(), x.max()
y0, y1 = y.min(), y.max()

if x1 == x0: # If same x value, set slope to 0 and use the mean value of y as interceipt
b = 0
a = y.mean()
else:
b = (y1 - y0) / (x1 - x0)
a = y0 - b * x0

return a, b


def expo_seed(x : np.array, y : np.array):
'''
Estimate the seed for an exponential fit of the form y = y0*exp(-x/lt).

Parameters
----------
x : np.array
Independent variable.
y : np.array
Dependent variable.

Returns
-------
seed : tuple
Seed parameters (y0, lt) for the exponential fit.
'''
x, y = zip(*sorted(zip(x, y)))
y0 = y[0]

if y0 <= 0 or y[-1] <= 0:
raise ValueError("y data must be > 0")

lt = -x[-1] / np.log(y[-1] / y0)

return y0, lt


def select_fit_variables(fittype : KrFitFunction, dst : pd.DataFrame):
'''
Select the data for fitting based on the specified fit type.

Parameters
----------
fittype : KrFitFunction
The type of fit function to prepare data for (e.g., linear, exponential, log-linear).
dst : pd.DataFrame
The DataFrame containing the data to be prepared for fitting.

Returns
-------
x_data : pd.Series
The independent variable data prepared for fitting.
y_data : pd.Series
The dependent variable data prepared for fitting.
'''
if fittype is KrFitFunction.linear : return dst.DT, dst.S2e
elif fittype is KrFitFunction.expo : return dst.DT, dst.S2e
elif fittype is KrFitFunction.log_lin: return dst.DT, -np.log(dst.S2e)


def get_function_and_seed_lt(fittype : KrFitFunction):
'''
Retrieve the fitting function and seed function based on the
specified fittype.

Parameters
----------
fittype : KrFitFunction
The type of fit function to retrieve (e.g., linear, exponential, log-linear).

Returns
-------
fit_function : function
The fitting function corresponding to the specified fit type.
seed_function : function
The seed function corresponding to the specified fit type.
'''
linear_function = lambda x, y0, slope: polynom(x, y0, slope)
expo_function = lambda x, e0, lt: expo (x, e0, -lt)

if fittype is KrFitFunction.linear: return linear_function, lin_seed
elif fittype is KrFitFunction.log_lin: return linear_function, lin_seed
elif fittype is KrFitFunction.expo: return expo_function, expo_seed


def transform_parameters(fit_output : FitFunction):
'''
Transform the parameters obtained from the fitting output into EO and LT.
When using log_lin fit, we need to convert the intermediate variables into
the actual physical magnitudes involved in the process.

Parameters
----------
fit_output : FitFunction
Output from IC's fit containing the parameter values, errors, and
covariance matrix.

Returns
-------
par : list
Transformed parameter values.
err : list
Transformed parameter errors.
cov : float
Transformed covariance value.
'''
par = fit_output.values
err = fit_output.errors
cov = fit_output.cov[0, 1]

a, b = par
u_a, u_b = err

E0 = np.exp(-a)
s_E0 = np.abs(E0 * u_a)
lt = 1 / b
s_lt = np.abs(lt**2 * u_b)
cov = E0 * lt**2 * cov # Not sure about this

par = [ E0, lt]
err = [s_E0, s_lt]

return par, err, cov

106 changes: 106 additions & 0 deletions invisible_cities/reco/krmap_functions_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import pytest

import numpy as np
import numpy.testing as npt
import pandas as pd
import scipy.optimize as so

from hypothesis import given, settings
from hypothesis.strategies import floats, integers

from .. reco import krmap_functions as krf
from ..types.symbols import KrFitFunction
from .. evm.ic_containers import FitFunction
from .. core.fit_functions import expo


@given(floats(min_value = 0, max_value = 10),
floats(min_value = 10, max_value = 20),
floats(min_value = 1, max_value = 100),
floats(min_value = 0, max_value = 10))
def test_lin_function_output_values(x_min, x_max, a, b):
x = np.array([x_min, x_max])
y = a + b * x
a_test, b_test = krf.lin_seed(x, y)

assert np.isclose(a_test, a)
assert np.isclose(b_test, b)


@given(floats(min_value = 1, max_value = 10),
floats(min_value = 1000, max_value = 1600),
floats(min_value = 1e4, max_value = 1e5),
floats(min_value = 1e4, max_value = 1e5))
def test_expo_seed_output_values(zmin, zmax, elt, e0):
x = np.array([zmin, zmax])
y = e0 * np.exp(-x / elt)
e0_test, elt_test = krf.expo_seed(x, y)

assert np.isclose( e0_test, e0, rtol=0.1)
assert np.isclose(elt_test, elt, rtol=0.1)


@pytest.fixture
def sample_df():
data = {'DT' : [10, 20, 30, 40, 50],
'S2e': [50, 45, 42, 41, 41]}
return pd.DataFrame(data)


def test_select_fit_variables(sample_df):
x_linear, y_linear = krf.select_fit_variables(KrFitFunction.linear, sample_df)
x_expo, y_expo = krf.select_fit_variables(KrFitFunction.expo, sample_df)
x_log_lin, y_log_lin = krf.select_fit_variables(KrFitFunction.log_lin, sample_df)

# First return the same for the 3 cases
assert (x_expo == x_linear).all()
assert (x_log_lin == x_linear).all()

# Second return different for log_lin case
assert (y_linear == y_expo).all()
assert (y_log_lin != y_expo).all()


@settings(deadline=None)
@given(floats (min_value = 1, max_value = 10),
floats (min_value = 1000, max_value = 1600),
integers(min_value = 10, max_value = 1e3),
floats (min_value = 1e3, max_value = 1e5),
floats (min_value = 5e3, max_value = 1e5))
def test_get_function_and_seed_lt_with_data(x_min, x_max, steps, e0, lt):
x = np.linspace(x_min, x_max, steps)
y = expo(x, e0, lt)

fit_func_lin, seed_func_lin = krf.get_function_and_seed_lt(KrFitFunction.linear)
fit_func_expo, seed_func_expo = krf.get_function_and_seed_lt(KrFitFunction.expo)
fit_func_log_lin, seed_func_log_lin = krf.get_function_and_seed_lt(KrFitFunction.log_lin)

popt_lin, _ = so.curve_fit(fit_func_lin, x, y, p0=seed_func_lin (x, y))
popt_expo, _ = so.curve_fit(fit_func_expo, x, y, p0=seed_func_expo (x, y))
popt_log_lin, _ = so.curve_fit(fit_func_log_lin, x, y, p0=seed_func_log_lin(x, y))

assert np.isclose(popt_lin[0], popt_expo[0], rtol=1e-1) # The interceipt should be close between lin and expo
assert not np.isclose(popt_lin[1], popt_expo[1], rtol=1e-1) # The "lifetime" should be different between lin and expo

assert np.isclose(popt_lin[0], popt_log_lin[0], rtol=1e-10) # The lin and log_lin are the same (the only difference is their
assert np.isclose(popt_lin[1], popt_log_lin[1], rtol=1e-10) # inputs: s2e or -log(s2e)) so both parameters should be the same
# for the purpose of testing this function

@given(floats(min_value = 1, max_value = 1e5),
floats(min_value = 10, max_value = 1e5))
def test_transform_parameters(a, b):
errors = 0.2*np.array([a, b])
fit_output = FitFunction(values=[a, b], errors=errors, cov=np.array([[0.04, 0.02], [0.02, 0.04]]),
fn=None, chi2=None, pvalue=None, infodict=None, mesg=None, ier=None)

transformed_par, transformed_err, transformed_cov = krf.transform_parameters(fit_output)

E0_expected = np.exp(-a)
s_E0_expected = np.abs(E0_expected * errors[0])
lt_expected = 1 / b
s_lt_expected = np.abs(lt_expected**2 * errors[1])
cov_expected = E0_expected * lt_expected**2 * 0.02

npt.assert_allclose(transformed_par, [ E0_expected, lt_expected])
npt.assert_allclose(transformed_err, [s_E0_expected, s_lt_expected])
assert np.isclose(transformed_cov, cov_expected)
6 changes: 6 additions & 0 deletions invisible_cities/types/symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ class InterpolationMethod(AutoNameEnumBase):
nointerpolation = auto()


class KrFitFunction(AutoNameEnumBase):
expo = auto()
linear = auto()
log_lin = auto()


class MCTableType(AutoNameEnumBase):
configuration = auto()
events = auto()
Expand Down
Loading