From b7e144acdfcdf7137c404c96b954f1f49538ccbd Mon Sep 17 00:00:00 2001 From: perib Date: Thu, 31 Oct 2024 20:44:51 -0400 Subject: [PATCH 1/7] "release" futures to free up memory and prevent memory leak --- tpot2/utils/eval_utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py index 7f93cef4..647bffdc 100644 --- a/tpot2/utils/eval_utils.py +++ b/tpot2/utils/eval_utils.py @@ -49,6 +49,7 @@ from dask.distributed import progress import distributed import func_timeout +import gc def process_scores(scores, n): ''' @@ -186,13 +187,15 @@ def parallel_eval_objective_list(individual_list, print("cancelld ", completed_future.cancelled()) scores = [np.nan for _ in range(n_expected_columns)] eval_error = "INVALID" + + completed_future.release() #release the future else: #if future is not done # check if the future has been running for too long, cancel the future # we multiply max_eval_time_mins by 1.25 since the objective function in the future should be able to cancel itself. This is a backup in case it doesn't. if max_eval_time_mins is not None and time.time() - submitted_futures[completed_future]["time"] > max_eval_time_mins*1.25*60: completed_future.cancel() - + completed_future.release() if verbose >= 4: print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n') @@ -200,6 +203,7 @@ def parallel_eval_objective_list(individual_list, eval_error = "TIMEOUT" elif global_timeout_triggered: completed_future.cancel() + completed_future.release() if verbose >= 4: print(f'WARNING AN INDIVIDUAL TIMED OUT (max_time_mins): \n {submitted_futures[completed_future]} \n') @@ -222,6 +226,10 @@ def parallel_eval_objective_list(individual_list, #update submitted futures submitted_futures.pop(completed_future) + + #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. + # client.run(gc.collect) #run garbage collection to free up memory + #break if timeout if global_timeout_triggered: while len(individual_stack) > 0: @@ -243,10 +251,10 @@ def parallel_eval_objective_list(individual_list, submitted_inds.add(individual.unique_id()) + #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. + # client.run(gc.collect) #run garbage collection to free up memory #collect remaining futures - - final_scores = [scores_dict[individual]["scores"] for individual in individual_list] final_start_times = [scores_dict[individual]["start_time"] for individual in individual_list] final_end_times = [scores_dict[individual]["end_time"] for individual in individual_list] From b6c1966c9fd91356d38faedf72a1d5774185be73 Mon Sep 17 00:00:00 2001 From: perib Date: Fri, 1 Nov 2024 11:34:09 -0400 Subject: [PATCH 2/7] steady state memory clears --- tpot2/evolvers/steady_state_evolver.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index 9a9f5708..a9270954 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -466,13 +466,14 @@ def optimize(self): print("cancelld ", completed_future.cancelled()) scores = [np.nan for _ in range(len(self.objective_names))] eval_error = "INVALID" + completed_future.release() #release the future else: #if future is not done if self.max_eval_time_mins is not None: #check if the future has been running for too long, cancel the future if time.time() - submitted_futures[completed_future]["time"] > self.max_eval_time_mins*1.25*60: completed_future.cancel() - + completed_future.release() #release the future if self.verbose >= 4: print(f'WARNING AN INDIVIDUAL TIMED OUT (Fallback): \n {submitted_futures[completed_future]} \n') @@ -506,6 +507,8 @@ def optimize(self): self.population.remove_invalid_from_population(column_names="Eval Error", invalid_value="INVALID") self.population.remove_invalid_from_population(column_names="Eval Error", invalid_value="TIMEOUT") + #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. + # client.run(gc.collect) #run garbage collection to free up memory ############################### # Step 2: Early Stopping @@ -717,6 +720,10 @@ def optimize(self): #done, cleanup futures for future in submitted_futures.keys(): future.cancel() + future.release() #release the future + + #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. + # client.run(gc.collect) #run garbage collection to free up memory #checkpoint if self.population_file is not None: From 7c0e9477e7585613edfe4e03d504269d0905f792 Mon Sep 17 00:00:00 2001 From: perib Date: Fri, 1 Nov 2024 16:27:11 -0400 Subject: [PATCH 3/7] add gc --- tpot2/evolvers/steady_state_evolver.py | 4 ++-- tpot2/utils/eval_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index a9270954..c7885ec6 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -508,7 +508,7 @@ def optimize(self): self.population.remove_invalid_from_population(column_names="Eval Error", invalid_value="TIMEOUT") #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - # client.run(gc.collect) #run garbage collection to free up memory + client.run(gc.collect) #run garbage collection to free up memory ############################### # Step 2: Early Stopping @@ -723,7 +723,7 @@ def optimize(self): future.release() #release the future #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - # client.run(gc.collect) #run garbage collection to free up memory + client.run(gc.collect) #run garbage collection to free up memory #checkpoint if self.population_file is not None: diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py index 647bffdc..5a3a3fe5 100644 --- a/tpot2/utils/eval_utils.py +++ b/tpot2/utils/eval_utils.py @@ -228,7 +228,7 @@ def parallel_eval_objective_list(individual_list, #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - # client.run(gc.collect) #run garbage collection to free up memory + client.run(gc.collect) #run garbage collection to free up memory #break if timeout if global_timeout_triggered: @@ -252,7 +252,7 @@ def parallel_eval_objective_list(individual_list, submitted_inds.add(individual.unique_id()) #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - # client.run(gc.collect) #run garbage collection to free up memory + client.run(gc.collect) #run garbage collection to free up memory #collect remaining futures final_scores = [scores_dict[individual]["scores"] for individual in individual_list] From cf39dbdf7efad0d844b06987246140e35dc85d47 Mon Sep 17 00:00:00 2001 From: Pedro Ribeiro Date: Fri, 8 Nov 2024 18:24:56 -0500 Subject: [PATCH 4/7] Update eval_utils.py --- tpot2/utils/eval_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py index 5a3a3fe5..37305bb6 100644 --- a/tpot2/utils/eval_utils.py +++ b/tpot2/utils/eval_utils.py @@ -228,7 +228,7 @@ def parallel_eval_objective_list(individual_list, #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - client.run(gc.collect) #run garbage collection to free up memory + #client.run(gc.collect) #run garbage collection to free up memory #break if timeout if global_timeout_triggered: @@ -252,7 +252,7 @@ def parallel_eval_objective_list(individual_list, submitted_inds.add(individual.unique_id()) #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - client.run(gc.collect) #run garbage collection to free up memory + #client.run(gc.collect) #run garbage collection to free up memory #collect remaining futures final_scores = [scores_dict[individual]["scores"] for individual in individual_list] From 12baf2e78cbfd9d1f3bc980ee2f1b5396fa73c05 Mon Sep 17 00:00:00 2001 From: Pedro Ribeiro Date: Fri, 8 Nov 2024 18:25:22 -0500 Subject: [PATCH 5/7] Update steady_state_evolver.py --- tpot2/evolvers/steady_state_evolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index c7885ec6..c9a4714e 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -508,7 +508,7 @@ def optimize(self): self.population.remove_invalid_from_population(column_names="Eval Error", invalid_value="TIMEOUT") #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - client.run(gc.collect) #run garbage collection to free up memory + #client.run(gc.collect) #run garbage collection to free up memory ############################### # Step 2: Early Stopping @@ -723,7 +723,7 @@ def optimize(self): future.release() #release the future #I am not entirely sure if this is necessary. I believe that calling release on the futures should be enough to free up memory. If memory issues persist, this may be a good place to start. - client.run(gc.collect) #run garbage collection to free up memory + #client.run(gc.collect) #run garbage collection to free up memory #checkpoint if self.population_file is not None: From 44cc8fa2b01512bc52f9456faa5cf012ac51b2df Mon Sep 17 00:00:00 2001 From: Pedro Ribeiro Date: Fri, 8 Nov 2024 18:31:42 -0500 Subject: [PATCH 6/7] Update steady_state_evolver.py --- tpot2/evolvers/steady_state_evolver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tpot2/evolvers/steady_state_evolver.py b/tpot2/evolvers/steady_state_evolver.py index c9a4714e..f077b05f 100644 --- a/tpot2/evolvers/steady_state_evolver.py +++ b/tpot2/evolvers/steady_state_evolver.py @@ -444,6 +444,7 @@ def optimize(self): print("Cancelled future (likely memory related)") scores = [np.nan for _ in range(len(self.objective_names))] eval_error = "INVALID" + client.run(gc.collect) else: #if the future is done and did not throw an error, get the scores try: scores = completed_future.result() From 073133b401f78571378d0dcff6239cf0f963127e Mon Sep 17 00:00:00 2001 From: Pedro Ribeiro Date: Fri, 8 Nov 2024 18:32:54 -0500 Subject: [PATCH 7/7] Update eval_utils.py --- tpot2/utils/eval_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tpot2/utils/eval_utils.py b/tpot2/utils/eval_utils.py index 37305bb6..bc308164 100644 --- a/tpot2/utils/eval_utils.py +++ b/tpot2/utils/eval_utils.py @@ -164,6 +164,7 @@ def parallel_eval_objective_list(individual_list, print("Cancelled future (likely memory related)") scores = [np.nan for _ in range(n_expected_columns)] eval_error = "INVALID" + client.run(gc.collect) else: #if the future is done and did not throw an error, get the scores try: scores = completed_future.result()