From ba465ca5b4ffc7673ff90c7757c1ff9092f88e63 Mon Sep 17 00:00:00 2001 From: matt bowen Date: Wed, 17 Aug 2022 07:31:25 -0400 Subject: [PATCH] Reduce number of threads to reduce memory pressure (#1780) --- data/data-pipeline/data_pipeline/etl/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/runner.py b/data/data-pipeline/data_pipeline/etl/runner.py index 5e9230bb3..cc691aaa9 100644 --- a/data/data-pipeline/data_pipeline/etl/runner.py +++ b/data/data-pipeline/data_pipeline/etl/runner.py @@ -1,6 +1,7 @@ import importlib import concurrent.futures import typing +import os from data_pipeline.etl.score.etl_score import ScoreETL from data_pipeline.etl.score.etl_score_geo import GeoScoreETL @@ -76,8 +77,8 @@ def etl_runner(dataset_to_run: str = None) -> None: None """ dataset_list = _get_datasets_to_run(dataset_to_run) - - with concurrent.futures.ThreadPoolExecutor() as executor: + max_workers = min(32, os.cpu_count() + 4)//2 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: futures = { executor.submit(_run_one_dataset, dataset=dataset) for dataset in dataset_list