diff --git a/invisible_cities/core/fit_functions.py b/invisible_cities/core/fit_functions.py index 9032adfaa..debdab8b9 100644 --- a/invisible_cities/core/fit_functions.py +++ b/invisible_cities/core/fit_functions.py @@ -157,8 +157,10 @@ def fit(func, x, y, seed=(), fit_range=None, **kwargs): Returns ------- - fitted_fun : extended function (contains values and errors) - Fitted function. + fitted_fun : FitFunction + Extended function containing fit parameters (fitf, vals, errors, chi2, pval, + cov) and full_output from curve_fit and leastsq (infodict, mesg and ier). + Examples -------- @@ -183,7 +185,7 @@ def fit(func, x, y, seed=(), fit_range=None, **kwargs): kwargs['absolute_sigma'] = "sigma" in kwargs - vals, cov = scipy.optimize.curve_fit(func, x, y, seed, **kwargs) + vals, cov, infodict, mesg, ier = scipy.optimize.curve_fit(func, x, y, seed, full_output=True, **kwargs) fitf = lambda x: func(x, *vals) fitx = fitf(x) @@ -191,8 +193,7 @@ def fit(func, x, y, seed=(), fit_range=None, **kwargs): ndof = len(y) - len(vals) chi2, pval = get_chi2_and_pvalue(y, fitx, ndof, sigma_r) - - return FitFunction(fitf, vals, errors, chi2, pval, cov) + return FitFunction(fitf, vals, errors, chi2, pval, cov, infodict, mesg, ier) def profileX(xdata, ydata, nbins=100, diff --git a/invisible_cities/evm/ic_containers.py b/invisible_cities/evm/ic_containers.py index 39eb0ff89..d49e249f2 100644 --- a/invisible_cities/evm/ic_containers.py +++ b/invisible_cities/evm/ic_containers.py @@ -25,7 +25,7 @@ def _add_namedtuple_in_this_module(name, attribute_names): ('CalibVectors' , 'channel_id coeff_blr coeff_c adc_to_pes adc_to_pes_sipm pmt_active'), ('S12Params' , 'time stride length rebin_stride'), ('ZsWf' , 'indices energies'), - ('FitFunction' , 'fn values errors chi2 pvalue cov'), + ('FitFunction' , 'fn values errors chi2 pvalue cov infodict mesg ier'), ('TriggerParams' , 'trigger_channels min_number_channels charge height width'), ('SensorParams' , 'spectra peak_range min_bin_peak max_bin_peak half_peak_width p1pe_seed lim_ped'), ('PedestalParams' , 'gain gain_min gain_max sigma sigma_min sigma_max')): diff --git a/invisible_cities/reco/krmap_functions.py b/invisible_cities/reco/krmap_functions.py new file mode 100644 index 000000000..a967041fd --- /dev/null +++ b/invisible_cities/reco/krmap_functions.py @@ -0,0 +1,151 @@ +import numpy as np +import pandas as pd + +from .. types.symbols import KrFitFunction +from .. evm.ic_containers import FitFunction +from .. core.fit_functions import polynom, expo + + +def lin_seed(x : np.array, y : np.array): + ''' + Estimate the seed for a linear fit of the form y = a + bx. + + Parameters + ---------- + x : np.array + Independent variable. + y : np.array + Dependent variable. + + Returns + ------- + seed : tuple + Seed parameters (intercept, slope) for the linear fit. + ''' + x0, x1 = x.min(), x.max() + y0, y1 = y.min(), y.max() + + if x1 == x0: # If same x value, set slope to 0 and use the mean value of y as interceipt + b = 0 + a = y.mean() + else: + b = (y1 - y0) / (x1 - x0) + a = y0 - b * x0 + + return a, b + + +def expo_seed(x : np.array, y : np.array): + ''' + Estimate the seed for an exponential fit of the form y = y0*exp(-x/lt). + + Parameters + ---------- + x : np.array + Independent variable. + y : np.array + Dependent variable. + + Returns + ------- + seed : tuple + Seed parameters (y0, lt) for the exponential fit. + ''' + x, y = zip(*sorted(zip(x, y))) + y0 = y[0] + + if y0 <= 0 or y[-1] <= 0: + raise ValueError("y data must be > 0") + + lt = -x[-1] / np.log(y[-1] / y0) + + return y0, lt + + +def select_fit_variables(fittype : KrFitFunction, dst : pd.DataFrame): + ''' + Select the data for fitting based on the specified fit type. + + Parameters + ---------- + fittype : KrFitFunction + The type of fit function to prepare data for (e.g., linear, exponential, log-linear). + dst : pd.DataFrame + The DataFrame containing the data to be prepared for fitting. + + Returns + ------- + x_data : pd.Series + The independent variable data prepared for fitting. + y_data : pd.Series + The dependent variable data prepared for fitting. + ''' + if fittype is KrFitFunction.linear : return dst.DT, dst.S2e + elif fittype is KrFitFunction.expo : return dst.DT, dst.S2e + elif fittype is KrFitFunction.log_lin: return dst.DT, -np.log(dst.S2e) + + +def get_function_and_seed_lt(fittype : KrFitFunction): + ''' + Retrieve the fitting function and seed function based on the + specified fittype. + + Parameters + ---------- + fittype : KrFitFunction + The type of fit function to retrieve (e.g., linear, exponential, log-linear). + + Returns + ------- + fit_function : function + The fitting function corresponding to the specified fit type. + seed_function : function + The seed function corresponding to the specified fit type. + ''' + linear_function = lambda x, y0, slope: polynom(x, y0, slope) + expo_function = lambda x, e0, lt: expo (x, e0, -lt) + + if fittype is KrFitFunction.linear: return linear_function, lin_seed + elif fittype is KrFitFunction.log_lin: return linear_function, lin_seed + elif fittype is KrFitFunction.expo: return expo_function, expo_seed + + +def transform_parameters(fit_output : FitFunction): + ''' + Transform the parameters obtained from the fitting output into EO and LT. + When using log_lin fit, we need to convert the intermediate variables into + the actual physical magnitudes involved in the process. + + Parameters + ---------- + fit_output : FitFunction + Output from IC's fit containing the parameter values, errors, and + covariance matrix. + + Returns + ------- + par : list + Transformed parameter values. + err : list + Transformed parameter errors. + cov : float + Transformed covariance value. + ''' + par = fit_output.values + err = fit_output.errors + cov = fit_output.cov[0, 1] + + a, b = par + u_a, u_b = err + + E0 = np.exp(-a) + s_E0 = np.abs(E0 * u_a) + lt = 1 / b + s_lt = np.abs(lt**2 * u_b) + cov = E0 * lt**2 * cov # Not sure about this + + par = [ E0, lt] + err = [s_E0, s_lt] + + return par, err, cov + diff --git a/invisible_cities/reco/krmap_functions_test.py b/invisible_cities/reco/krmap_functions_test.py new file mode 100644 index 000000000..fcb5cbd34 --- /dev/null +++ b/invisible_cities/reco/krmap_functions_test.py @@ -0,0 +1,106 @@ +import pytest + +import numpy as np +import numpy.testing as npt +import pandas as pd +import scipy.optimize as so + +from hypothesis import given, settings +from hypothesis.strategies import floats, integers + +from .. reco import krmap_functions as krf +from ..types.symbols import KrFitFunction +from .. evm.ic_containers import FitFunction +from .. core.fit_functions import expo + + +@given(floats(min_value = 0, max_value = 10), + floats(min_value = 10, max_value = 20), + floats(min_value = 1, max_value = 100), + floats(min_value = 0, max_value = 10)) +def test_lin_function_output_values(x_min, x_max, a, b): + x = np.array([x_min, x_max]) + y = a + b * x + a_test, b_test = krf.lin_seed(x, y) + + assert np.isclose(a_test, a) + assert np.isclose(b_test, b) + + +@given(floats(min_value = 1, max_value = 10), + floats(min_value = 1000, max_value = 1600), + floats(min_value = 1e4, max_value = 1e5), + floats(min_value = 1e4, max_value = 1e5)) +def test_expo_seed_output_values(zmin, zmax, elt, e0): + x = np.array([zmin, zmax]) + y = e0 * np.exp(-x / elt) + e0_test, elt_test = krf.expo_seed(x, y) + + assert np.isclose( e0_test, e0, rtol=0.1) + assert np.isclose(elt_test, elt, rtol=0.1) + + +@pytest.fixture +def sample_df(): + data = {'DT' : [10, 20, 30, 40, 50], + 'S2e': [50, 45, 42, 41, 41]} + return pd.DataFrame(data) + + +def test_select_fit_variables(sample_df): + x_linear, y_linear = krf.select_fit_variables(KrFitFunction.linear, sample_df) + x_expo, y_expo = krf.select_fit_variables(KrFitFunction.expo, sample_df) + x_log_lin, y_log_lin = krf.select_fit_variables(KrFitFunction.log_lin, sample_df) + + # First return the same for the 3 cases + assert (x_expo == x_linear).all() + assert (x_log_lin == x_linear).all() + + # Second return different for log_lin case + assert (y_linear == y_expo).all() + assert (y_log_lin != y_expo).all() + + +@settings(deadline=None) +@given(floats (min_value = 1, max_value = 10), + floats (min_value = 1000, max_value = 1600), + integers(min_value = 10, max_value = 1e3), + floats (min_value = 1e3, max_value = 1e5), + floats (min_value = 5e3, max_value = 1e5)) +def test_get_function_and_seed_lt_with_data(x_min, x_max, steps, e0, lt): + x = np.linspace(x_min, x_max, steps) + y = expo(x, e0, lt) + + fit_func_lin, seed_func_lin = krf.get_function_and_seed_lt(KrFitFunction.linear) + fit_func_expo, seed_func_expo = krf.get_function_and_seed_lt(KrFitFunction.expo) + fit_func_log_lin, seed_func_log_lin = krf.get_function_and_seed_lt(KrFitFunction.log_lin) + + popt_lin, _ = so.curve_fit(fit_func_lin, x, y, p0=seed_func_lin (x, y)) + popt_expo, _ = so.curve_fit(fit_func_expo, x, y, p0=seed_func_expo (x, y)) + popt_log_lin, _ = so.curve_fit(fit_func_log_lin, x, y, p0=seed_func_log_lin(x, y)) + + assert np.isclose(popt_lin[0], popt_expo[0], rtol=1e-1) # The interceipt should be close between lin and expo + assert not np.isclose(popt_lin[1], popt_expo[1], rtol=1e-1) # The "lifetime" should be different between lin and expo + + assert np.isclose(popt_lin[0], popt_log_lin[0], rtol=1e-10) # The lin and log_lin are the same (the only difference is their + assert np.isclose(popt_lin[1], popt_log_lin[1], rtol=1e-10) # inputs: s2e or -log(s2e)) so both parameters should be the same + # for the purpose of testing this function + +@given(floats(min_value = 1, max_value = 1e5), + floats(min_value = 10, max_value = 1e5)) +def test_transform_parameters(a, b): + errors = 0.2*np.array([a, b]) + fit_output = FitFunction(values=[a, b], errors=errors, cov=np.array([[0.04, 0.02], [0.02, 0.04]]), + fn=None, chi2=None, pvalue=None, infodict=None, mesg=None, ier=None) + + transformed_par, transformed_err, transformed_cov = krf.transform_parameters(fit_output) + + E0_expected = np.exp(-a) + s_E0_expected = np.abs(E0_expected * errors[0]) + lt_expected = 1 / b + s_lt_expected = np.abs(lt_expected**2 * errors[1]) + cov_expected = E0_expected * lt_expected**2 * 0.02 + + npt.assert_allclose(transformed_par, [ E0_expected, lt_expected]) + npt.assert_allclose(transformed_err, [s_E0_expected, s_lt_expected]) + assert np.isclose(transformed_cov, cov_expected) diff --git a/invisible_cities/types/symbols.py b/invisible_cities/types/symbols.py index cbd0b9628..cd25b91c1 100644 --- a/invisible_cities/types/symbols.py +++ b/invisible_cities/types/symbols.py @@ -53,6 +53,12 @@ class InterpolationMethod(AutoNameEnumBase): nointerpolation = auto() +class KrFitFunction(AutoNameEnumBase): + expo = auto() + linear = auto() + log_lin = auto() + + class MCTableType(AutoNameEnumBase): configuration = auto() events = auto()