From f6c4a00f801e217d227d8c93f0775423a5c7d5fe Mon Sep 17 00:00:00 2001 From: Lingfeng Wei Date: Tue, 30 Apr 2024 18:13:18 -0700 Subject: [PATCH 1/4] Add analytical velocity fit; Add controls over using scipy.curve_fit or analytical velocity fit, and show progress; Fixed the is/is not statement --- flystar/archive_io.py | 18 ++++ flystar/fit_velocity.py | 158 ++++++++++++++++++++++++++++++ flystar/match.py | 6 +- flystar/plots.py | 6 +- flystar/startables.py | 207 ++++++++++++++++++++++++++-------------- 5 files changed, 316 insertions(+), 79 deletions(-) create mode 100755 flystar/archive_io.py create mode 100755 flystar/fit_velocity.py diff --git a/flystar/archive_io.py b/flystar/archive_io.py new file mode 100755 index 0000000..88de5cb --- /dev/null +++ b/flystar/archive_io.py @@ -0,0 +1,18 @@ +import pickle + +# Need to add these functions to a utility .py file rather than storing them in general structure. +def open_archive(file_name): + """ + Helper function to open archived files. + """ + with open(file_name, 'rb') as file_archive: + file_dict = pickle.load(file_archive) + return file_dict + +def save_archive(file_name, save_data): + """ + Helper function to archive a file. + """ + with open(file_name, 'wb') as outfile: + pickle.dump(save_data, outfile, protocol=pickle.HIGHEST_PROTOCOL) + return \ No newline at end of file diff --git a/flystar/fit_velocity.py b/flystar/fit_velocity.py new file mode 100755 index 0000000..0c83b99 --- /dev/null +++ b/flystar/fit_velocity.py @@ -0,0 +1,158 @@ +import tqdm +import numpy as np +import pandas as pd + +def linear(x, k, b): + return k*x + b + +def linear_fit(x, y, sigma=None, absolute_sigma=True): + """Weighted linear regression (See https://en.wikipedia.org/wiki/Weighted_least_squares#Solution). Recommended for low-dimension, non-degenerate data. Otherwise, please use scipy.curve_fit. + + Parameters + ---------- + x : array-like + x data + y : array-like + y data + sigma : array-like, optional + Weighted by 1/sigma**2. If not provided, weight = 1, by default None + absolute_sigma : bool, optional + If True (default), sigma is used in an absolute sense and the estimated parameter uncertainty reflects these absolute values. If False, only the relative magnitudes of the sigma values matter, by default True + + Returns + ------- + result : dictionary + Dictionary with keys 'slope', 'e_slope', 'intercept', 'e_intercept', and 'chi2' if return_chi2=True. + """ + x = np.array(x) + y = np.array(y) + if sigma is None: + sigma = np.ones_like(x) + else: + sigma = np.array(sigma) + + X = np.vander(x, 2) + W = np.diag(1/sigma**2) + XTWX = X.T @ W @ X + pcov = np.linalg.inv(XTWX) # Covariance Matrix + popt = pcov @ X.T @ W @ y # Linear Solution + perr = np.sqrt(np.diag(pcov)) # Uncertainty of Linear Solution + + residual = y - X @ popt + chi2 = residual.T @ W @ residual + + if not absolute_sigma: + reduced_chi2 = chi2/(len(x) - 2) + perr *= reduced_chi2**0.5 + + result = { + 'slope': popt[0], + 'intercept': popt[1], + 'e_slope': perr[0], + 'e_intercept': perr[1], + 'chi2': chi2 + } + + return result + + +def calc_chi2(x, y, sigma, slope, intercept): + popt = np.array([slope, intercept]) + X = np.vander(x, 2) + W = np.diag(1/sigma**2) + residual = y - X @ popt + return residual.T @ W @ residual + + +def fit_velocity(startable, weighting='var', absolute_sigma=True, epoch_cols=None): + """Fit proper motion with weighted linear regression equations (see https://en.wikipedia.org/wiki/Weighted_least_squares#Solution). + + Parameters + ---------- + startable : StarTable + StarTable object + weighting : str, optional + Weighting by variance (1/ye**2) or standard deviation (1/ye), by default 'var' + epoch_cols : list, optional + Choose which columns to use. If None, use all columns, by default None + + Returns + ------- + result : pd.DataFrame + Proper motion dataframe with keys vx, vxe, vy, vye, x0, x0e, y0, y0e + + Raises + ------ + ValueError + If weighting is neither 'std' nor 'var' + """ + if weighting not in ['std', 'var']: + raise ValueError(f"Weighting must be either 'std' or 'var', not '{weighting}'.") + if epoch_cols is None: + epoch_cols = np.arange(len(startable.meta['YEARS'])) # use all cols if not specified + + N = len(startable) + x0 = np.zeros(N) + x0e = np.zeros(N) + y0 = np.zeros(N) + y0e = np.zeros(N) + + vx = np.zeros(N) + vxe = np.zeros(N) + vy = np.zeros(N) + vye = np.zeros(N) + + time = np.array(startable.meta['YEARS'])[epoch_cols] + + for i in tqdm(range(len(startable))): + x = startable['x'][i, epoch_cols] + y = startable['y'][i, epoch_cols] + xe = startable['xe'][i, epoch_cols] + ye = startable['ye'][i, epoch_cols] + + t_weight = 1. / np.hypot(xe, ye) + t0 = np.average(time, weights=t_weight) + dt = time - t0 + + if weighting == 'var': + vx_result = linear_fit(dt, x, xe, absolute_sigma=absolute_sigma) + elif weighting == 'std': + vx_result = linear_fit(dt, x, np.abs(xe)**0.5) + vx[i] = vx_result['slope'] + vxe[i] = vx_result['e_slope'] + x0[i] = vx_result['intercept'] + x0e[i] = vx_result['e_intercept'] + + if weighting == 'var': + vy_result = linear_fit(dt, y, ye) + elif weighting == 'std': + vy_result = linear_fit(dt, y, np.abs(ye)**0.5, absolute_sigma=absolute_sigma) + vy[i] = vy_result['slope'] + vye[i] = vy_result['e_slope'] + y0[i] = vy_result['intercept'] + y0e[i] = vy_result['e_intercept'] + + result = pd.DataFrame({ + 'vx': vx, 'vxe': vxe, + 'vy': vy, 'vye': vye, + 'x0': x0, 'x0e': x0e, + 'y0': y0, 'y0e': y0e + }) + return result + + +# Test +if __name__=='__main__': + from scipy.optimize import curve_fit + + x = np.array([1,2,3,4]) + y = np.array([1,2,5,6]) + sigma = np.array([.4,.2,.1,.3]) + + for absolute_sigma in [True, False]: + result = linear_fit(x, y, sigma=sigma, absolute_sigma=absolute_sigma) + popt, pcov = curve_fit(linear, x, y, sigma=sigma, absolute_sigma=absolute_sigma) + perr = np.sqrt(np.diag(pcov)) + print(f'Absolute Sigma = {absolute_sigma}') + print(f"linear_fit: slope = {result['slope']:.3f} ± {result['e_slope']:.3f}, intercept = {result['intercept']:.3f} ± {result['e_intercept']:.3f}, chi2={result['chi2']:.3f}") + print(f'curve_fit: slope = {popt[0]:.3f} ± {perr[0]:.3f}, intercept = {popt[1]:.3f} ± {perr[1]:.3f}, chi2={calc_chi2(x, y, sigma, *popt):.3f}\n') \ No newline at end of file diff --git a/flystar/match.py b/flystar/match.py index 67a04f3..d40ccdb 100644 --- a/flystar/match.py +++ b/flystar/match.py @@ -531,7 +531,7 @@ def generic_match(sl1, sl2, init_mode='triangle', raise TypeError("The second catalog has to be a StarList") # Find the initial transformation - if init_mode is 'triangle': # Blind triangles method + if init_mode == 'triangle': # Blind triangles method # Prepare the reduced starlists for matching sl1_cut = copy.deepcopy(sl1) @@ -548,13 +548,13 @@ def generic_match(sl1, sl2, init_mode='triangle', transf = align.initial_align(sl1_cut, sl2_cut, briteN=n_bright, transformModel=model, order=order_dr[0]) #order_dr[i_loop][0] ? - elif init_mode is 'match_name': # Name match + elif init_mode == 'match_name': # Name match sl1_idx_init, sl2_idx_init, _ = starlists.restrict_by_name(sl1, sl2) transf = model(sl2['x'][sl2_idx_init], sl2['y'][sl2_idx_init], sl1['x'][sl1_idx_init], sl1['y'][sl1_idx_init], order=int(order_dr[0][0])) - elif init_mode is 'load': # Load a transformation file + elif init_mode == 'load': # Load a transformation file transf = transforms.Transform2D.from_file(kwargs['transf_file']) else: # None of the above diff --git a/flystar/plots.py b/flystar/plots.py index 7abde6e..672d075 100755 --- a/flystar/plots.py +++ b/flystar/plots.py @@ -12,6 +12,7 @@ import pdb import math import astropy +from astropy.table import Table from astropy.io import ascii #################################################### @@ -2156,13 +2157,12 @@ def plot_chi2_dist(tab, Ndetect, xlim=40, n_bins=50): chi2_y_list = [] fnd_list = [] # Number of non-NaN error measurements - for ii in range(len(tab['xe'])): + for ii in range(len(tab)): # Ignore the NaNs fnd = np.argwhere(~np.isnan(tab['xe'][ii,:])) # fnd = np.where(tab['xe'][ii, :] > 0)[0] fnd_list.append(len(fnd)) - time = tab['t'][ii, fnd] x = tab['x'][ii, fnd] y = tab['y'][ii, fnd] xerr = tab['xe'][ii, fnd] @@ -3310,7 +3310,7 @@ def plot_sky(stars_tab, label=label, picker=4) #for legend - if label is not '_nolegend_': + if label != '_nolegend_': line.set_label(str(label)) epochs_legend.append(line) diff --git a/flystar/startables.py b/flystar/startables.py index 78fa270..839a1d6 100644 --- a/flystar/startables.py +++ b/flystar/startables.py @@ -1,6 +1,7 @@ from astropy.table import Table, Column from astropy.stats import sigma_clipping from scipy.optimize import curve_fit +from flystar.fit_velocity import linear_fit, calc_chi2, linear from tqdm import tqdm import numpy as np import warnings @@ -527,16 +528,43 @@ def detections(self): return - def fit_velocities(self, weighting='var', bootstrap=0, fixed_t0=False, verbose=False, - mask_val=None, mask_lists=False): - """ - Fit velocities for all stars in the table. + def fit_velocities(self, weighting='var', use_scipy=True, absolute_sigma=True, bootstrap=0, fixed_t0=False, verbose=False, + mask_val=None, mask_lists=False, show_progress=True): + """Fit velocities for all stars in the table and add to the columns 'vx', 'vxe', 'vy', 'vye', 'x0', 'x0e', 'y0', 'y0e'. + + Parameters + ---------- + weighting : str, optional + Weight by variance 'var' or standard deviation 'std', by default 'var' + use_scipy : bool, optional + Use scipy.curve_fit (recommended for large number of epochs, but may return inf or nan) or analytic fitting from flystar.fit_velocity.linear_fit (recommended for a few epochs), by default True + absolute_sigma : bool, optional + Absolute sigma or not. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html for details, by default True + bootstrap : int, optional + Calculate uncertain using bootstraping or not, by default 0 + fixed_t0 : bool or array-like, optional + Fix the t0 in dt = time - t0 if user provides an array with the same length of the table, or automatically calculate t0 = np.average(time, weights=1/np.hypot(xe, ye)) if False, by default False + verbose : bool, optional + Output verbose information or not, by default False + mask_val : float, optional + Value that needs to be masked in the data, e.g. -100000, by default None + mask_lists : list, optional + Columns that needs to be masked, by default False + show_progress : bool, optional + Show progress bar or not, by default True + + Raises + ------ + ValueError + If weighting is neither 'var' or 'std' + KeyError + If there's not time information in the table """ if weighting not in ['var', 'std']: - raise ValueError(f"Weighting must either be 'var' or 'std', not {weighting}!") + raise ValueError(f"fit_velocities: Weighting must either be 'var' or 'std', not {weighting}!") if ('t' not in self.colnames) and ('list_times' not in self.meta): - raise RuntimeError('fit_velocities: Failed to time values.') + raise KeyError("fit_velocities: Failed to time values. No 't' column in table, no 'list_times' in meta.") N_stars, N_epochs = self['x'].shape @@ -562,12 +590,15 @@ def fit_velocities(self, weighting='var', bootstrap=0, fixed_t0=False, verbose=F self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'vx')) self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'y0')) self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'vy')) - + self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'x0e')) self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'vxe')) self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'y0e')) self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'vye')) + self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'chi2_vx')) + self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 'chi2_vy')) + self.add_column(Column(data = np.zeros(N_stars, dtype=float), name = 't0')) self.add_column(Column(data = np.zeros(N_stars, dtype=int), name = 'n_vfit')) @@ -595,21 +626,22 @@ def fit_velocities(self, weighting='var', bootstrap=0, fixed_t0=False, verbose=F # STARS LOOP through the stars and work on them 1 at a time. # This is slow; but robust. - for ss in tqdm(range(N_stars)): - self.fit_velocity_for_star(ss, bootstrap=bootstrap, fixed_t0=fixed_t0, - mask_val=mask_val, mask_lists=mask_lists, weighting=weighting) - + if show_progress: + for ss in tqdm(range(N_stars)): + self.fit_velocity_for_star(ss, weighting=weighting, use_scipy=use_scipy, absolute_sigma=absolute_sigma, bootstrap=bootstrap, fixed_t0=fixed_t0, + mask_val=mask_val, mask_lists=mask_lists) + else: + for ss in range(N_stars): + self.fit_velocity_for_star(ss, weighting=weighting, use_scipy=use_scipy, absolute_sigma=absolute_sigma, bootstrap=bootstrap, fixed_t0=fixed_t0, + mask_val=mask_val, mask_lists=mask_lists, ) if verbose: stop_time = time.time() print('startable.fit_velocities runtime = {0:.0f} s for {1:d} stars'.format(stop_time - start_time, N_stars)) return - def fit_velocity_for_star(self, ss, bootstrap=False, fixed_t0=False, - mask_val=None, mask_lists=False, weighting='var'): - def poly_model(time, *params): - pos = np.polynomial.polynomial.polyval(time, params) - return pos + def fit_velocity_for_star(self, ss, weighting='var', use_scipy=True, absolute_sigma=True, bootstrap=False, fixed_t0=False, + mask_val=None, mask_lists=False): # Make a mask of invalid (NaN) values and a user-specified invalid value. x = np.ma.masked_invalid(self['x'][ss, :].data) @@ -723,9 +755,9 @@ def poly_model(time, *params): xe = xe[good] ye = ye[good] - # np.polynomial ordering - p0x = np.array([x.mean(), 0.0]) - p0y = np.array([y.mean(), 0.0]) + # slope, intercept + p0x = np.array([0., x.mean()]) + p0y = np.array([0., y.mean()]) # Unless t0 is fixed, calculate the t0 for the stars. if fixed_t0 is False: @@ -769,77 +801,113 @@ def poly_model(time, *params): sigma_x = np.abs(xe)**0.5 sigma_y = np.abs(ye)**0.5 - vx_opt, vx_cov = curve_fit(poly_model, dt, x, p0=p0x, sigma=sigma_x, - absolute_sigma=True) - vy_opt, vy_cov = curve_fit(poly_model, dt, y, p0=p0y, sigma=sigma_y, - absolute_sigma=True) + if use_scipy: + vx_opt, vx_cov = curve_fit(linear, dt, x, p0=p0x, sigma=sigma_x, absolute_sigma=absolute_sigma) + vy_opt, vy_cov = curve_fit(linear, dt, y, p0=p0y, sigma=sigma_y, absolute_sigma=absolute_sigma) + vx = vx_opt[0] + x0 = vx_opt[1] + vy = vy_opt[0] + y0 = vy_opt[1] + chi2_vx = calc_chi2(dt, x, sigma_x, *vx_opt) + chi2_vy = calc_chi2(dt, y, sigma_y, *vy_opt) + + else: + result_vx = linear_fit(dt, x, sigma_x, absolute_sigma=absolute_sigma) + result_vy = linear_fit(dt, y, sigma_y, absolute_sigma=absolute_sigma) + vx = result_vx['slope'] + x0 = result_vx['intercept'] + vy = result_vy['slope'] + y0 = result_vy['intercept'] + chi2_vx = result_vx['chi2'] + chi2_vy = result_vy['chi2'] + + self['vx'][ss] = vx + self['x0'][ss] = x0 + self['vy'][ss] = vy + self['y0'][ss] = y0 + self['chi2_vx'][ss] = chi2_vx + self['chi2_vy'][ss] = chi2_vy - self['x0'][ss] = vx_opt[0] - self['vx'][ss] = vx_opt[1] - self['y0'][ss] = vy_opt[0] - self['vy'][ss] = vy_opt[1] - # Run the bootstrap if bootstrap > 0: edx = np.arange(N_good, dtype=int) - fit_x0_b = np.zeros(bootstrap, dtype=float) - fit_vx_b = np.zeros(bootstrap, dtype=float) - fit_y0_b = np.zeros(bootstrap, dtype=float) - fit_vy_b = np.zeros(bootstrap, dtype=float) + vx_b = np.zeros(bootstrap, dtype=float) + x0_b = np.zeros(bootstrap, dtype=float) + vy_b = np.zeros(bootstrap, dtype=float) + y0_b = np.zeros(bootstrap, dtype=float) for bb in range(bootstrap): bdx = np.random.choice(edx, N_good) if weighting == 'var': - sigma_x = xe[bdx] - sigma_y = ye[bdx] + sigma_x_b = xe[bdx] + sigma_y_b = ye[bdx] elif weighting == 'std': - sigma_x = xe[bdx]**0.5 - sigma_y = ye[bdx]**0.5 + sigma_x_b = xe[bdx]**0.5 + sigma_y_b = ye[bdx]**0.5 - vx_opt_b, vx_cov_b = curve_fit(poly_model, dt[bdx], x[bdx], p0=vx_opt, sigma=sigma_x, - absolute_sigma=True) - vy_opt_b, vy_cov_b = curve_fit(poly_model, dt[bdx], y[bdx], p0=vy_opt, sigma=sigma_y, - absolute_sigma=True) - - fit_x0_b[bb] = vx_opt_b[0] - fit_vx_b[bb] = vx_opt_b[1] - fit_y0_b[bb] = vy_opt_b[0] - fit_vy_b[bb] = vy_opt_b[1] - + if use_scipy: + vx_opt_b, vx_cov_b = curve_fit(linear, dt[bdx], x[bdx], p0=vx_opt, sigma=sigma_x_b, + absolute_sigma=True) + vy_opt_b, vy_cov_b = curve_fit(linear, dt[bdx], y[bdx], p0=vy_opt, sigma=sigma_y_b, + absolute_sigma=True) + vx_b[bb] = vx_opt_b[0] + x0_b[bb] = vx_opt_b[1] + vy_b[bb] = vy_opt_b[0] + y0_b[bb] = vy_opt_b[1] + + else: + result_vx_b = linear_fit(dt[bdx], x[bdx], sigma=sigma_x_b) + result_vy_b = linear_fit(dt[bdx], y[bdx], sigma=sigma_y_b) + vx_b[bb] = result_vx_b['slope'] + x0_b[bb] = result_vx_b['intercept'] + vy_b[bb] = result_vy_b['slope'] + y0_b[bb] = result_vy_b['intercept'] + # Save the errors from the bootstrap - self['x0e'][ss] = fit_x0_b.std() - self['vxe'][ss] = fit_vx_b.std() - self['y0e'][ss] = fit_y0_b.std() - self['vye'][ss] = fit_vy_b.std() + self['vxe'][ss] = vx_b.std() + self['x0e'][ss] = x0_b.std() + self['vye'][ss] = vy_b.std() + self['y0e'][ss] = y0_b.std() + else: - vx_err = np.sqrt(vx_cov.diagonal()) - vy_err = np.sqrt(vy_cov.diagonal()) - - self['x0e'][ss] = vx_err[0] - self['vxe'][ss] = vx_err[1] - self['y0e'][ss] = vy_err[0] - self['vye'][ss] = vy_err[1] + if use_scipy: + vxe, x0e = np.sqrt(vx_cov.diagonal()) + vye, y0e = np.sqrt(vy_cov.diagonal()) + else: + vxe = result_vx['e_slope'] + x0e = result_vx['e_intercept'] + vye = result_vy['e_slope'] + y0e = result_vy['e_intercept'] + + self['vxe'][ss] = vxe + self['x0e'][ss] = x0e + self['vye'][ss] = vye + self['y0e'][ss] = y0e elif N_good == 2: - # Not enough epochs to fit a velocity. - if weighting == 'var': - self['x0'][ss] = np.average(x, weights=1.0/xe**2) - self['y0'][ss] = np.average(y, weights=1.0/ye**2) - elif weighting == 'std': - self['x0'][ss] = np.average(x, weights=1.0/np.abs(xe)) - self['y0'][ss] = np.average(y, weights=1.0/np.abs(ye)) - + # Not enough epochs to fit a velocity. dx = np.diff(x)[0] dy = np.diff(y)[0] dt_diff = np.diff(dt)[0] + if weighting == 'var': + sigma_x = 1./xe**2 + sigma_y = 1./ye**2 + elif weighting == 'std': + sigma_x = 1./np.abs(xe) + sigma_y = 1./np.abs(ye) + + self['x0'][ss] = np.average(x, weights=sigma_x) + self['y0'][ss] = np.average(y, weights=sigma_y) self['x0e'][ss] = np.abs(dx) / 2**0.5 self['y0e'][ss] = np.abs(dy) / 2**0.5 self['vx'][ss] = dx / dt_diff self['vy'][ss] = dy / dt_diff self['vxe'][ss] = 0.0 self['vye'][ss] = 0.0 + self['chi2_vx'][ss] = calc_chi2(dt, x, sigma_x, self['vx'][ss], self['x0'][ss]) + self['chi2_vy'][ss] = calc_chi2(dt, y, sigma_y, self['vy'][ss], self['y0'][ss]) else: # N_good == 1 case @@ -851,11 +919,4 @@ def poly_model(time, *params): self['x0e'] = xe self['y0e'] = ye - return - - - - - - - + return \ No newline at end of file From 6df4b97357df2fd226cdc769a7af40587b721b66 Mon Sep 17 00:00:00 2001 From: Lingfeng Wei Date: Mon, 13 May 2024 10:19:02 -0700 Subject: [PATCH 2/4] Added fit velocity for stars detected in all given epochs --- flystar/fit_velocity.py | 119 +++++++++++++++++++++++++++------------- flystar/startables.py | 98 +++++++++++++++++++++++++++++++-- 2 files changed, 176 insertions(+), 41 deletions(-) diff --git a/flystar/fit_velocity.py b/flystar/fit_velocity.py index 0c83b99..45d55d0 100755 --- a/flystar/fit_velocity.py +++ b/flystar/fit_velocity.py @@ -1,4 +1,4 @@ -import tqdm +from tqdm import tqdm import numpy as np import pandas as pd @@ -64,8 +64,9 @@ def calc_chi2(x, y, sigma, slope, intercept): return residual.T @ W @ residual -def fit_velocity(startable, weighting='var', absolute_sigma=True, epoch_cols=None): +def fit_velocity(startable, weighting='var', use_scipy=False, absolute_sigma=True, epoch_cols='all', art_star=False): """Fit proper motion with weighted linear regression equations (see https://en.wikipedia.org/wiki/Weighted_least_squares#Solution). + Assumes that all data are valid. Parameters ---------- @@ -73,9 +74,13 @@ def fit_velocity(startable, weighting='var', absolute_sigma=True, epoch_cols=Non StarTable object weighting : str, optional Weighting by variance (1/ye**2) or standard deviation (1/ye), by default 'var' - epoch_cols : list, optional - Choose which columns to use. If None, use all columns, by default None - + use_scipy : bool, optional + Use scipy.curve_fit or flystar.fit_velocity.linear_fit, by default False + epoch_cols : str or list of intergers, optional + List of indicies of columns to use. If 'all', use all columns, by default 'all' + art_star : bool, optional + Artificial star catalog or not. If True, use startable['x'][:, epoch_ols, 1] as the location, by default False. + Returns ------- result : pd.DataFrame @@ -92,51 +97,91 @@ def fit_velocity(startable, weighting='var', absolute_sigma=True, epoch_cols=Non epoch_cols = np.arange(len(startable.meta['YEARS'])) # use all cols if not specified N = len(startable) - x0 = np.zeros(N) - x0e = np.zeros(N) - y0 = np.zeros(N) - y0e = np.zeros(N) - vx = np.zeros(N) - vxe = np.zeros(N) vy = np.zeros(N) + vxe = np.zeros(N) vye = np.zeros(N) + x0 = np.zeros(N) + y0 = np.zeros(N) + x0e = np.zeros(N) + y0e = np.zeros(N) + chi2_vx = np.zeros(N) + chi2_vy = np.zeros(N) + t0 = np.zeros(N) time = np.array(startable.meta['YEARS'])[epoch_cols] + if not art_star: + x_arr = startable['x'][:, epoch_cols] + y_arr = startable['y'][:, epoch_cols] + else: + x_arr = startable['x'][:, epoch_cols, 1] + y_arr = startable['y'][:, epoch_cols, 1] + + xe_arr = startable['xe'][:, epoch_cols] + ye_arr = startable['ye'][:, epoch_cols] + + if weighting=='std': + sigma_x_arr = np.abs(xe_arr)**0.5 + sigma_y_arr = np.abs(ye_arr)**0.5 + elif weighting=='var': + sigma_x_arr = xe_arr + sigma_y_arr = ye_arr + + # For each star for i in tqdm(range(len(startable))): - x = startable['x'][i, epoch_cols] - y = startable['y'][i, epoch_cols] - xe = startable['xe'][i, epoch_cols] - ye = startable['ye'][i, epoch_cols] + x = x_arr[i] + y = y_arr[i] + xe = xe_arr[i] + ye = ye_arr[i] + sigma_x = sigma_x_arr[i] + sigma_y = sigma_y_arr[i] t_weight = 1. / np.hypot(xe, ye) - t0 = np.average(time, weights=t_weight) - dt = time - t0 + t0[i] = np.average(time, weights=t_weight) + dt = time - t0[i] - if weighting == 'var': - vx_result = linear_fit(dt, x, xe, absolute_sigma=absolute_sigma) - elif weighting == 'std': - vx_result = linear_fit(dt, x, np.abs(xe)**0.5) - vx[i] = vx_result['slope'] - vxe[i] = vx_result['e_slope'] - x0[i] = vx_result['intercept'] - x0e[i] = vx_result['e_intercept'] + if use_scipy: + p0x = np.array([0., x.mean()]) + p0y = np.array([0., y.mean()]) + + # Use scipy.curve_fit to fit for velocity + vx_opt, vx_cov = curve_fit(linear, dt, x, p0=p0x, sigma=sigma_x, absolute_sigma=absolute_sigma) + vy_opt, vy_cov = curve_fit(linear, dt, y, p0=p0y, sigma=sigma_y, absolute_sigma=absolute_sigma) + + vx[i] = vx_opt[0] + vy[i] = vy_opt[0] + x0[i] = vx_opt[1] + y0[i] = vy_opt[1] + vxe[i], x0e[i] = np.sqrt(vx_cov.diagonal()) + vye[i], y0e[i] = np.sqrt(vy_cov.diagonal()) + chi2_vx[i] = calc_chi2(dt, x, sigma_x, *vx_opt) + chi2_vy[i] = calc_chi2(dt, y, sigma_y, *vy_opt) - if weighting == 'var': - vy_result = linear_fit(dt, y, ye) - elif weighting == 'std': - vy_result = linear_fit(dt, y, np.abs(ye)**0.5, absolute_sigma=absolute_sigma) - vy[i] = vy_result['slope'] - vye[i] = vy_result['e_slope'] - y0[i] = vy_result['intercept'] - y0e[i] = vy_result['e_intercept'] + else: + vx_result = linear_fit(dt, x, sigma=sigma_x, absolute_sigma=absolute_sigma) + vy_result = linear_fit(dt, y, sigma=sigma_y, absolute_sigma=absolute_sigma) + + vx[i] = vx_result['slope'] + vxe[i] = vx_result['e_slope'] + x0[i] = vx_result['intercept'] + x0e[i] = vx_result['e_intercept'] + chi2_vx[i] = vx_result['chi2'] + + vy[i] = vy_result['slope'] + vye[i] = vy_result['e_slope'] + y0[i] = vy_result['intercept'] + y0e[i] = vy_result['e_intercept'] + chi2_vy[i] = vy_result['chi2'] result = pd.DataFrame({ - 'vx': vx, 'vxe': vxe, - 'vy': vy, 'vye': vye, - 'x0': x0, 'x0e': x0e, - 'y0': y0, 'y0e': y0e + 'vx': vx, 'vy': vy, + 'vxe': vxe, 'vye': vye, + 'x0': x0, 'y0': y0, + 'x0e': x0e, 'y0e': y0e, + 'chi2_vx': chi2_vx, + 'chi2_vy': chi2_vy, + 't0': t0 }) return result diff --git a/flystar/startables.py b/flystar/startables.py index 839a1d6..4a7716b 100644 --- a/flystar/startables.py +++ b/flystar/startables.py @@ -1,7 +1,7 @@ -from astropy.table import Table, Column +from astropy.table import Table, Column, hstack from astropy.stats import sigma_clipping from scipy.optimize import curve_fit -from flystar.fit_velocity import linear_fit, calc_chi2, linear +from flystar.fit_velocity import linear_fit, calc_chi2, linear, fit_velocity from tqdm import tqdm import numpy as np import warnings @@ -566,7 +566,7 @@ def fit_velocities(self, weighting='var', use_scipy=True, absolute_sigma=True, b if ('t' not in self.colnames) and ('list_times' not in self.meta): raise KeyError("fit_velocities: Failed to time values. No 't' column in table, no 'list_times' in meta.") - N_stars, N_epochs = self['x'].shape + N_stars = len(self) if verbose: start_time = time.time() @@ -582,6 +582,8 @@ def fit_velocities(self, weighting='var', use_scipy=True, absolute_sigma=True, b if 'vxe' in self.colnames: self.remove_column('vxe') if 'y0e' in self.colnames: self.remove_column('y0e') if 'vye' in self.colnames: self.remove_column('vye') + if 'chi2_vx' in self.colnames: self.remove_column('chi2_vx') + if 'chi2_vy' in self.colnames: self.remove_column('chi2_vy') if 't0' in self.colnames: self.remove_column('t0') if 'n_vfit' in self.colnames: self.remove_column('n_vfit') @@ -919,4 +921,92 @@ def fit_velocity_for_star(self, ss, weighting='var', use_scipy=True, absolute_si self['x0e'] = xe self['y0e'] = ye - return \ No newline at end of file + return + + + def fit_velocities_all_epochs(self, weighting='var', use_scipy=False, absolute_sigma=False, epoch_cols='all', mask_val=None, art_star=False, return_result=False): + """Fit velocities for stars detected in all epochs specified by epoch_cols. + Criterion: xe/ye error > 0 and finite, x/y not masked. + + Parameters + ---------- + weighting : str, optional + Variance weighting('var') or standard deviation weighting ('std'), by default 'var' + use_scipy : bool, optional + Use scipy.curve_fit or flystar.fit_velocity.fit_velocity, by default False + absolute_sigma : bool, optional + Absolute sigma or rescaled sigma, by default False + epoch_cols : str or list of intergers, optional + List of epoch column indices used for fitting velocity, by default 'all' + mask_val : float, optional + Values in x, y to be masked + art_star : bool, optional + Artificial star or observation star catalog. If artificial star, use 'det' column to select stars detected in all epochs, by default False + return_result : bool, optional + Return the velocity results or not, by default False + + Returns + ------- + vel_result : astropy Table + Astropy Table with velocity results + """ + + N_stars = len(self) + + if epoch_cols == 'all': + epoch_cols = np.arange(np.shape(self['x'])[1]) + + # Artificial Star + if art_star: + detected_in_all_epochs = np.all(self['det'][:, epoch_cols], axis=1) + + # Observation Star + else: + valid_xe = np.all(self['xe'][:, epoch_cols]!=0, axis=1) & np.all(np.isfinite(self['xe'][:, epoch_cols]), axis=1) + valid_ye = np.all(self['ye'][:, epoch_cols]!=0, axis=1) & np.all(np.isfinite(self['ye'][:, epoch_cols]), axis=1) + + if mask_val: + x = np.ma.masked_values(self['x'][:, epoch_cols], mask_val) + y = np.ma.masked_values(self['y'][:, epoch_cols], mask_val) + + # If no mask, convert x.mask to list + if not np.ma.is_masked(x): + x.mask = np.zeros_like(self['x'][:, epoch_cols].data, dtype=bool) + if not np.ma.is_masked(y): + y.mask = np.zeros_like(self['y'][:, epoch_cols].data, dtype=bool) + + valid_x = ~np.any(x.mask, axis=1) + valid_y = ~np.any(y.mask, axis=1) + detected_in_all_epochs = np.logical_and.reduce(( + valid_x, valid_y, valid_xe, valid_ye + )) + else: + detected_in_all_epochs = np.logical_and(valid_xe, valid_ye) + + + # Fit velocities + vel_result = fit_velocity(self[detected_in_all_epochs], weighting=weighting, use_scipy=use_scipy, absolute_sigma=absolute_sigma, epoch_cols=epoch_cols, art_star=art_star) + vel_result = Table.from_pandas(vel_result) + + + # Add n_vfit + n_vfit = len(epoch_cols) + vel_result['n_vfit'] = n_vfit + + # Clean/remove up old arrays. + columns = [*vel_result.keys(), 'n_vfit'] + for column in columns: + if column in self.colnames: self.remove_column(column) + + # Update self + for column in columns: + column_array = np.ma.zeros(N_stars) + column_array[detected_in_all_epochs] = vel_result[column] + column_array[~detected_in_all_epochs] = np.nan + column_array.mask = ~detected_in_all_epochs + self[column] = column_array + + if return_result: + return vel_result + else: + return \ No newline at end of file From 9393773dd83dddf2682983652f6502cb59d17e91 Mon Sep 17 00:00:00 2001 From: Lingfeng Wei Date: Mon, 17 Jun 2024 15:29:12 -0700 Subject: [PATCH 3/4] Rename function --- flystar/startables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flystar/startables.py b/flystar/startables.py index 4a7716b..da0d03b 100644 --- a/flystar/startables.py +++ b/flystar/startables.py @@ -924,7 +924,7 @@ def fit_velocity_for_star(self, ss, weighting='var', use_scipy=True, absolute_si return - def fit_velocities_all_epochs(self, weighting='var', use_scipy=False, absolute_sigma=False, epoch_cols='all', mask_val=None, art_star=False, return_result=False): + def fit_velocities_all_detected(self, weighting='var', use_scipy=False, absolute_sigma=False, epoch_cols='all', mask_val=None, art_star=False, return_result=False): """Fit velocities for stars detected in all epochs specified by epoch_cols. Criterion: xe/ye error > 0 and finite, x/y not masked. From 7aefaba34ec260f1303dff86c0fdae24366af405 Mon Sep 17 00:00:00 2001 From: Lingfeng Wei Date: Mon, 17 Jun 2024 15:35:41 -0700 Subject: [PATCH 4/4] Fix missing absolute_sigma control --- flystar/startables.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flystar/startables.py b/flystar/startables.py index da0d03b..7e2c0a1 100644 --- a/flystar/startables.py +++ b/flystar/startables.py @@ -850,17 +850,17 @@ def fit_velocity_for_star(self, ss, weighting='var', use_scipy=True, absolute_si if use_scipy: vx_opt_b, vx_cov_b = curve_fit(linear, dt[bdx], x[bdx], p0=vx_opt, sigma=sigma_x_b, - absolute_sigma=True) + absolute_sigma=absolute_sigma) vy_opt_b, vy_cov_b = curve_fit(linear, dt[bdx], y[bdx], p0=vy_opt, sigma=sigma_y_b, - absolute_sigma=True) + absolute_sigma=absolute_sigma) vx_b[bb] = vx_opt_b[0] x0_b[bb] = vx_opt_b[1] vy_b[bb] = vy_opt_b[0] y0_b[bb] = vy_opt_b[1] else: - result_vx_b = linear_fit(dt[bdx], x[bdx], sigma=sigma_x_b) - result_vy_b = linear_fit(dt[bdx], y[bdx], sigma=sigma_y_b) + result_vx_b = linear_fit(dt[bdx], x[bdx], sigma=sigma_x_b, absolute_sigma=absolute_sigma) + result_vy_b = linear_fit(dt[bdx], y[bdx], sigma=sigma_y_b, absolute_sigma=absolute_sigma) vx_b[bb] = result_vx_b['slope'] x0_b[bb] = result_vx_b['intercept'] vy_b[bb] = result_vy_b['slope']