From 010e6b1148d70b560fbba3f64106b3f2982b99fb Mon Sep 17 00:00:00 2001 From: Macy Huston Date: Fri, 24 Oct 2025 08:08:38 -0700 Subject: [PATCH 1/4] decrease memory useage in bootstrap --- flystar/align.py | 82 +++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 29 deletions(-) diff --git a/flystar/align.py b/flystar/align.py index 7b00ba0..03f557b 100755 --- a/flystar/align.py +++ b/flystar/align.py @@ -1127,15 +1127,15 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot else: idx_good = np.arange(0, len(ref_table), 1) idx_ref = np.where(ref_table['use_in_trans'] == True) - - # Initialize output arrays - x_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - y_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - m_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - xe_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - ye_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - me_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999 - + + # Initialize sums for output + x_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + x2_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + y_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + y2_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + m_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + m2_boot_sum = np.zeros((len(ref_table['x']), n_epochs)) + # Set up motion model parameters motion_model_list = ['Fixed', self.default_motion_model] if 'motion_model_used' in ref_table.keys(): @@ -1144,9 +1144,11 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot motion_model_list += ref_table['motion_model_input'].tolist() motion_col_list = motion_model.get_list_motion_model_param_names(np.unique(motion_model_list).tolist(), with_errors=False, with_fixed=False) if calc_vel_in_bootstrap: - motion_data = {} + motion_boot_sum = {} + motion2_boot_sum = {} for col in motion_col_list: - motion_data[col] = np.ones((len(ref_table['x']), n_boot)) * -999 + motion_boot_sum[col] = np.zeros((len(ref_table['x']))) + motion2_boot_sum[col] = np.zeros((len(ref_table['x']))) ### IF MEMORY PROBLEMS HERE: ### DEFINE MEAN, STD VARIABLES AND BUILD THEM RATHER THAN SAVING FULL ARRAY @@ -1158,6 +1160,15 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot # reference stars. Use a loop for each epoch here, so we # can handle case where different reference stars are used # in different epochs + + # Initialize data arrays + x_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + y_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + m_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + xe_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + ye_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + me_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999 + for jj in range(n_epochs): # Extract bootstrap sample of matched reference stars good = np.where(~np.isnan(ref_table['x_orig'][idx_ref][:,jj])) @@ -1236,13 +1247,20 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot starlist_T.transform_xy(trans) # Add output to pos arrays - x_trans_arr[:,ii,jj] = starlist_T['x'] - y_trans_arr[:,ii,jj] = starlist_T['y'] - m_trans_arr[:,ii,jj] = starlist_T['m'] - xe_trans_arr[:,ii,jj] = starlist_T['xe'] - ye_trans_arr[:,ii,jj] = starlist_T['ye'] - me_trans_arr[:,ii,jj] = starlist_T['me'] - + x_trans_arr[:,jj] = starlist_T['x'] + y_trans_arr[:,jj] = starlist_T['y'] + m_trans_arr[:,jj] = starlist_T['m'] + xe_trans_arr[:,jj] = starlist_T['xe'] + ye_trans_arr[:,jj] = starlist_T['ye'] + me_trans_arr[:,jj] = starlist_T['me'] + + x_boot_sum += x_trans_arr + x2_boot_sum += x_trans_arr**2 + y_boot_sum += y_trans_arr + y2_boot_sum += y_trans_arr**2 + m_boot_sum += m_trans_arr + m2_boot_sum += m_trans_arr**2 + t2 = time.time() #print('=================================================') #print('Time to do {0} epochs: {1}s'.format(n_epochs, t2-t1)) @@ -1258,12 +1276,12 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot t_boot = t_arr[boot_idx] star_table = StarTable(name=ref_table['name'], - x=x_trans_arr[:,ii,boot_idx], - y=y_trans_arr[:,ii,boot_idx], - m=m_trans_arr[:,ii,boot_idx], - xe=xe_trans_arr[:,ii,boot_idx], - ye=ye_trans_arr[:,ii,boot_idx], - me=me_trans_arr[:,ii,boot_idx], + x=x_trans_arr[:,boot_idx], + y=y_trans_arr[:,boot_idx], + m=m_trans_arr[:,boot_idx], + xe=xe_trans_arr[:,boot_idx], + ye=ye_trans_arr[:,boot_idx], + me=me_trans_arr[:,boot_idx], t=np.tile(t_boot, (len(ref_table),1))) # Now, do proper motion calculation, making sure to fix t0 to the @@ -1278,7 +1296,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot # Save proper motion fit results to output arrays for col in motion_col_list: - motion_data[col][:,ii] = star_table[col] + motion_boot_sum[col] += star_table[col] + motion2_boot_sum[col] += star_table[col]**2 # Quick check to make sure bootstrap calc was valid: output t0 should be # same as input t0_arr, since we used fixed_t0 option @@ -1290,15 +1309,20 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot #print('=================================================') # Calculate the bootstrap error values. - x_err_b = np.std(x_trans_arr, ddof=1, axis=1) - y_err_b = np.std(y_trans_arr, ddof=1, axis=1) - m_err_b = np.std(m_trans_arr, ddof=1, axis=1) + x_boot_mean = x_boot_sum/n_boot + x_err_b = np.sqrt((x2_boot_sum - 2*x_boot_mean*x_boot_sum + n_boot*x_boot_mean**2)/n_boot) + y_boot_mean = y_boot_sum/n_boot + y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot) + m_boot_mean = m_boot_sum/n_boot + m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot) #pdb.set_trace() motion_data_err = {} if calc_vel_in_bootstrap: for col in motion_col_list: - motion_data_err[col] = np.nanstd(motion_data[col], ddof=1,axis=1) + mot_boot_mean = motion_boot_sum[col]/n_boot + motion_data_err[col] = np.sqrt((motion2_boot_sum[col] - + 2*mot_boot_mean*motion_boot_sum[col] + n_boot*mot_boot_mean**2)/n_boot) else: for col in motion_col_list: motion_data_err[col] = np.nan From 6e50b6c247b44661d65a4a99c3f27af4d117d046 Mon Sep 17 00:00:00 2001 From: Macy Huston Date: Fri, 24 Oct 2025 09:15:15 -0700 Subject: [PATCH 2/4] debug bootstrap vel --- flystar/align.py | 11 ++++++++--- flystar/tests/test_align.py | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/flystar/align.py b/flystar/align.py index 03f557b..f394879 100755 --- a/flystar/align.py +++ b/flystar/align.py @@ -1149,6 +1149,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot for col in motion_col_list: motion_boot_sum[col] = np.zeros((len(ref_table['x']))) motion2_boot_sum[col] = np.zeros((len(ref_table['x']))) + motion_boot_min_epochs = np.max([self.motion_model_dict[mod].n_pts_req + for mod in np.unique(motion_model_list)]) ### IF MEMORY PROBLEMS HERE: ### DEFINE MEAN, STD VARIABLES AND BUILD THEM RATHER THAN SAVING FULL ARRAY @@ -1270,9 +1272,10 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot # for each star, if desired. Draw a full-sample bootstrap over the epochs # for each star, and then run it through the startable fit_velocities machinery if calc_vel_in_bootstrap: - # TODO: consider confirming we reach some threshold of unique time values here? - # TODO: Like, grab n_pts needed for the default motion model maybe boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs) + while len(np.unique(boot_idx)) < motion_boot_min_epochs: + boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs) + print(boot_idx) t_boot = t_arr[boot_idx] star_table = StarTable(name=ref_table['name'], @@ -1298,6 +1301,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot for col in motion_col_list: motion_boot_sum[col] += star_table[col] motion2_boot_sum[col] += star_table[col]**2 + print(t_boot) + print(star_table[['vx','x0']]) # Quick check to make sure bootstrap calc was valid: output t0 should be # same as input t0_arr, since we used fixed_t0 option @@ -1315,7 +1320,7 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot) m_boot_mean = m_boot_sum/n_boot m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot) - #pdb.set_trace() + pdb.set_trace() motion_data_err = {} if calc_vel_in_bootstrap: diff --git a/flystar/tests/test_align.py b/flystar/tests/test_align.py index 6a63a58..9b65eb6 100644 --- a/flystar/tests/test_align.py +++ b/flystar/tests/test_align.py @@ -1141,6 +1141,7 @@ def test_bootstrap(): assert np.sum(np.isnan(match1.ref_table['ye_boot'])) == 0 assert np.sum(np.isnan(match1.ref_table['vx_err_boot'])) == 0 assert np.sum(np.isnan(match1.ref_table['vy_err_boot'])) == 0 + #pdb.set_trace() # Test 2: make sure boot_epochs_min is working # Eliminate some rows to list2, so some stars are only in 1 epoch. From 8d68cbf5a128594e86bd0b485bfec8637575a10f Mon Sep 17 00:00:00 2001 From: Macy Huston Date: Fri, 24 Oct 2025 09:26:02 -0700 Subject: [PATCH 3/4] debug bootstrap vel --- flystar/align.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/flystar/align.py b/flystar/align.py index f394879..87995e1 100755 --- a/flystar/align.py +++ b/flystar/align.py @@ -1260,8 +1260,9 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot x2_boot_sum += x_trans_arr**2 y_boot_sum += y_trans_arr y2_boot_sum += y_trans_arr**2 - m_boot_sum += m_trans_arr - m2_boot_sum += m_trans_arr**2 + if self.mag_trans: + m_boot_sum += m_trans_arr + m2_boot_sum += m_trans_arr**2 t2 = time.time() #print('=================================================') From b28f8a0e6dbe3b4e2292a159e9aa6ff661c877c0 Mon Sep 17 00:00:00 2001 From: Macy Huston Date: Fri, 24 Oct 2025 09:44:01 -0700 Subject: [PATCH 4/4] finalize bootstrap changes --- .gitignore | 3 +++ flystar/align.py | 4 ---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index b56c81e..b291312 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +*.png +*random*.fits +*.DS_Store *.pyc .ipynb_checkpoints/ diff --git a/flystar/align.py b/flystar/align.py index 87995e1..f5f0742 100755 --- a/flystar/align.py +++ b/flystar/align.py @@ -1276,7 +1276,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs) while len(np.unique(boot_idx)) < motion_boot_min_epochs: boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs) - print(boot_idx) t_boot = t_arr[boot_idx] star_table = StarTable(name=ref_table['name'], @@ -1302,8 +1301,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot for col in motion_col_list: motion_boot_sum[col] += star_table[col] motion2_boot_sum[col] += star_table[col]**2 - print(t_boot) - print(star_table[['vx','x0']]) # Quick check to make sure bootstrap calc was valid: output t0 should be # same as input t0_arr, since we used fixed_t0 option @@ -1321,7 +1318,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot) m_boot_mean = m_boot_sum/n_boot m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot) - pdb.set_trace() motion_data_err = {} if calc_vel_in_bootstrap: