From 010e6b1148d70b560fbba3f64106b3f2982b99fb Mon Sep 17 00:00:00 2001
From: Macy Huston <mhuston@laser.astro.berkeley.edu>
Date: Fri, 24 Oct 2025 08:08:38 -0700
Subject: [PATCH 1/4] decrease memory useage in bootstrap

---
 flystar/align.py | 82 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 53 insertions(+), 29 deletions(-)

diff --git a/flystar/align.py b/flystar/align.py
index 7b00ba0..03f557b 100755
--- a/flystar/align.py
+++ b/flystar/align.py
@@ -1127,15 +1127,15 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
         else:
             idx_good = np.arange(0, len(ref_table), 1)
         idx_ref = np.where(ref_table['use_in_trans'] == True)
-
-        # Initialize output arrays
-        x_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-        y_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-        m_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-        xe_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-        ye_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-        me_trans_arr = np.ones((len(ref_table['x']), n_boot, n_epochs)) * -999
-
+        
+        # Initialize sums for output
+        x_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        x2_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        y_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        y2_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        m_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        m2_boot_sum = np.zeros((len(ref_table['x']), n_epochs))
+        
         # Set up motion model parameters
         motion_model_list = ['Fixed', self.default_motion_model]
         if 'motion_model_used' in ref_table.keys():
@@ -1144,9 +1144,11 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
             motion_model_list += ref_table['motion_model_input'].tolist()
         motion_col_list = motion_model.get_list_motion_model_param_names(np.unique(motion_model_list).tolist(), with_errors=False, with_fixed=False)
         if calc_vel_in_bootstrap:
-            motion_data = {}
+            motion_boot_sum = {}
+            motion2_boot_sum = {}
             for col in motion_col_list:
-                motion_data[col] = np.ones((len(ref_table['x']), n_boot)) * -999
+                motion_boot_sum[col] = np.zeros((len(ref_table['x'])))
+                motion2_boot_sum[col] = np.zeros((len(ref_table['x'])))
 
         ### IF MEMORY PROBLEMS HERE:
         ### DEFINE MEAN, STD VARIABLES AND BUILD THEM RATHER THAN SAVING FULL ARRAY
@@ -1158,6 +1160,15 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
             # reference stars. Use a loop for each epoch here, so we
             # can handle case where different reference stars are used
             # in different epochs
+            
+            # Initialize data arrays
+            x_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            y_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            m_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            xe_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            ye_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            me_trans_arr = np.ones((len(ref_table['x']), n_epochs)) * -999
+            
             for jj in range(n_epochs):
                 # Extract bootstrap sample of matched reference stars
                 good = np.where(~np.isnan(ref_table['x_orig'][idx_ref][:,jj]))
@@ -1236,13 +1247,20 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                     starlist_T.transform_xy(trans)
                     
                 # Add output to pos arrays
-                x_trans_arr[:,ii,jj] = starlist_T['x']
-                y_trans_arr[:,ii,jj] = starlist_T['y']
-                m_trans_arr[:,ii,jj] = starlist_T['m']
-                xe_trans_arr[:,ii,jj] = starlist_T['xe']
-                ye_trans_arr[:,ii,jj] = starlist_T['ye']
-                me_trans_arr[:,ii,jj] = starlist_T['me']
- 
+                x_trans_arr[:,jj] = starlist_T['x']
+                y_trans_arr[:,jj] = starlist_T['y']
+                m_trans_arr[:,jj] = starlist_T['m']
+                xe_trans_arr[:,jj] = starlist_T['xe']
+                ye_trans_arr[:,jj] = starlist_T['ye']
+                me_trans_arr[:,jj] = starlist_T['me']
+                
+            x_boot_sum += x_trans_arr
+            x2_boot_sum += x_trans_arr**2
+            y_boot_sum += y_trans_arr
+            y2_boot_sum += y_trans_arr**2
+            m_boot_sum += m_trans_arr
+            m2_boot_sum += m_trans_arr**2
+            
             t2 = time.time()
             #print('=================================================')
             #print('Time to do {0} epochs: {1}s'.format(n_epochs,  t2-t1))
@@ -1258,12 +1276,12 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                 t_boot = t_arr[boot_idx]
             
                 star_table = StarTable(name=ref_table['name'],
-                                        x=x_trans_arr[:,ii,boot_idx],
-                                        y=y_trans_arr[:,ii,boot_idx],
-                                        m=m_trans_arr[:,ii,boot_idx],
-                                        xe=xe_trans_arr[:,ii,boot_idx],
-                                        ye=ye_trans_arr[:,ii,boot_idx],
-                                        me=me_trans_arr[:,ii,boot_idx],
+                                        x=x_trans_arr[:,boot_idx],
+                                        y=y_trans_arr[:,boot_idx],
+                                        m=m_trans_arr[:,boot_idx],
+                                        xe=xe_trans_arr[:,boot_idx],
+                                        ye=ye_trans_arr[:,boot_idx],
+                                        me=me_trans_arr[:,boot_idx],
                                         t=np.tile(t_boot, (len(ref_table),1)))
 
                 # Now, do proper motion calculation, making sure to fix t0 to the
@@ -1278,7 +1296,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
 
                 # Save proper motion fit results to output arrays
                 for col in motion_col_list:
-                    motion_data[col][:,ii] = star_table[col]
+                    motion_boot_sum[col] += star_table[col]
+                    motion2_boot_sum[col] += star_table[col]**2
 
                 # Quick check to make sure bootstrap calc was valid: output t0 should be
                 # same as input t0_arr, since we used fixed_t0 option
@@ -1290,15 +1309,20 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                 #print('=================================================')
 
         # Calculate the bootstrap error values.
-        x_err_b = np.std(x_trans_arr, ddof=1, axis=1)
-        y_err_b = np.std(y_trans_arr, ddof=1, axis=1)
-        m_err_b = np.std(m_trans_arr, ddof=1, axis=1)
+        x_boot_mean = x_boot_sum/n_boot
+        x_err_b = np.sqrt((x2_boot_sum - 2*x_boot_mean*x_boot_sum + n_boot*x_boot_mean**2)/n_boot)
+        y_boot_mean = y_boot_sum/n_boot
+        y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot)
+        m_boot_mean = m_boot_sum/n_boot
+        m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot)
         #pdb.set_trace()
 
         motion_data_err = {}
         if calc_vel_in_bootstrap:
             for col in motion_col_list:
-                motion_data_err[col] = np.nanstd(motion_data[col], ddof=1,axis=1)
+                mot_boot_mean = motion_boot_sum[col]/n_boot
+                motion_data_err[col] = np.sqrt((motion2_boot_sum[col] -
+                    2*mot_boot_mean*motion_boot_sum[col] + n_boot*mot_boot_mean**2)/n_boot)
         else:
             for col in motion_col_list:
                 motion_data_err[col] = np.nan

From 6e50b6c247b44661d65a4a99c3f27af4d117d046 Mon Sep 17 00:00:00 2001
From: Macy Huston <mhuston@laser.astro.berkeley.edu>
Date: Fri, 24 Oct 2025 09:15:15 -0700
Subject: [PATCH 2/4] debug bootstrap vel

---
 flystar/align.py            | 11 ++++++++---
 flystar/tests/test_align.py |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/flystar/align.py b/flystar/align.py
index 03f557b..f394879 100755
--- a/flystar/align.py
+++ b/flystar/align.py
@@ -1149,6 +1149,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
             for col in motion_col_list:
                 motion_boot_sum[col] = np.zeros((len(ref_table['x'])))
                 motion2_boot_sum[col] = np.zeros((len(ref_table['x'])))
+        motion_boot_min_epochs = np.max([self.motion_model_dict[mod].n_pts_req
+                                    for mod in np.unique(motion_model_list)])
 
         ### IF MEMORY PROBLEMS HERE:
         ### DEFINE MEAN, STD VARIABLES AND BUILD THEM RATHER THAN SAVING FULL ARRAY
@@ -1270,9 +1272,10 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
             # for each star, if desired. Draw a full-sample bootstrap over the epochs
             # for each star, and then run it through the startable fit_velocities machinery
             if calc_vel_in_bootstrap:
-                # TODO: consider confirming we reach some threshold of unique time values here?
-                # TODO: Like, grab n_pts needed for the default motion model maybe
                 boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs)
+                while len(np.unique(boot_idx)) < motion_boot_min_epochs:
+                    boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs)
+                print(boot_idx)
                 t_boot = t_arr[boot_idx]
             
                 star_table = StarTable(name=ref_table['name'],
@@ -1298,6 +1301,8 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                 for col in motion_col_list:
                     motion_boot_sum[col] += star_table[col]
                     motion2_boot_sum[col] += star_table[col]**2
+                print(t_boot)
+                print(star_table[['vx','x0']])
 
                 # Quick check to make sure bootstrap calc was valid: output t0 should be
                 # same as input t0_arr, since we used fixed_t0 option
@@ -1315,7 +1320,7 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
         y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot)
         m_boot_mean = m_boot_sum/n_boot
         m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot)
-        #pdb.set_trace()
+        pdb.set_trace()
 
         motion_data_err = {}
         if calc_vel_in_bootstrap:
diff --git a/flystar/tests/test_align.py b/flystar/tests/test_align.py
index 6a63a58..9b65eb6 100644
--- a/flystar/tests/test_align.py
+++ b/flystar/tests/test_align.py
@@ -1141,6 +1141,7 @@ def test_bootstrap():
     assert np.sum(np.isnan(match1.ref_table['ye_boot'])) == 0
     assert np.sum(np.isnan(match1.ref_table['vx_err_boot'])) == 0
     assert np.sum(np.isnan(match1.ref_table['vy_err_boot'])) == 0
+    #pdb.set_trace()
 
     # Test 2: make sure boot_epochs_min is working
     # Eliminate some rows to list2, so some stars are only in 1 epoch.

From 8d68cbf5a128594e86bd0b485bfec8637575a10f Mon Sep 17 00:00:00 2001
From: Macy Huston <mhuston@laser.astro.berkeley.edu>
Date: Fri, 24 Oct 2025 09:26:02 -0700
Subject: [PATCH 3/4] debug bootstrap vel

---
 flystar/align.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flystar/align.py b/flystar/align.py
index f394879..87995e1 100755
--- a/flystar/align.py
+++ b/flystar/align.py
@@ -1260,8 +1260,9 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
             x2_boot_sum += x_trans_arr**2
             y_boot_sum += y_trans_arr
             y2_boot_sum += y_trans_arr**2
-            m_boot_sum += m_trans_arr
-            m2_boot_sum += m_trans_arr**2
+            if self.mag_trans:
+                m_boot_sum += m_trans_arr
+                m2_boot_sum += m_trans_arr**2
             
             t2 = time.time()
             #print('=================================================')

From b28f8a0e6dbe3b4e2292a159e9aa6ff661c877c0 Mon Sep 17 00:00:00 2001
From: Macy Huston <mhuston@laser.astro.berkeley.edu>
Date: Fri, 24 Oct 2025 09:44:01 -0700
Subject: [PATCH 4/4] finalize bootstrap changes

---
 .gitignore       | 3 +++
 flystar/align.py | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index b56c81e..b291312 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+*.png
+*random*.fits
+*.DS_Store
 *.pyc
 .ipynb_checkpoints/
 
diff --git a/flystar/align.py b/flystar/align.py
index 87995e1..f5f0742 100755
--- a/flystar/align.py
+++ b/flystar/align.py
@@ -1276,7 +1276,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                 boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs)
                 while len(np.unique(boot_idx)) < motion_boot_min_epochs:
                     boot_idx = np.random.choice(np.arange(0, n_epochs, 1), size=n_epochs)
-                print(boot_idx)
                 t_boot = t_arr[boot_idx]
             
                 star_table = StarTable(name=ref_table['name'],
@@ -1302,8 +1301,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
                 for col in motion_col_list:
                     motion_boot_sum[col] += star_table[col]
                     motion2_boot_sum[col] += star_table[col]**2
-                print(t_boot)
-                print(star_table[['vx','x0']])
 
                 # Quick check to make sure bootstrap calc was valid: output t0 should be
                 # same as input t0_arr, since we used fixed_t0 option
@@ -1321,7 +1318,6 @@ def calc_bootstrap_errors(self, n_boot=100, boot_epochs_min=-1, calc_vel_in_boot
         y_err_b = np.sqrt((y2_boot_sum - 2*y_boot_mean*y_boot_sum + n_boot*y_boot_mean**2)/n_boot)
         m_boot_mean = m_boot_sum/n_boot
         m_err_b = np.sqrt((m2_boot_sum - 2*m_boot_mean*m_boot_sum + n_boot*m_boot_mean**2)/n_boot)
-        pdb.set_trace()
 
         motion_data_err = {}
         if calc_vel_in_bootstrap: