Skip to content

Commit

Permalink
[bugfix]set shm_size to 8G if not specified
Browse files Browse the repository at this point in the history
  • Loading branch information
charlieyl committed Dec 11, 2024
1 parent 02b87f4 commit 9ff4a56
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 2 additions & 2 deletions python/fedml/computing/scheduler/comm_utils/job_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def occupy_gpu_ids(self, run_id, request_gpu_num, device_id, inner_id=None,
logging.info(f"Cache not set yet, fetching realtime available GPU Ids: {realtime_available_gpu_ids}")

# If the available GPU list is not in the cache, set it to the current system available GPU list
if available_gpu_ids is None or available_gpu_ids == []:
if available_gpu_ids is None:
# Get realtime GPU availability list from the system
available_gpu_ids = realtime_available_gpu_ids
else:
Expand Down Expand Up @@ -333,7 +333,7 @@ def get_available_gpu_id_list(device_id):
device_id)

# If the available GPU list is not in the cache, set it to the current system available GPU list
if available_gpu_ids is None or available_gpu_ids == []:
if available_gpu_ids is None:
# Get realtime GPU availability list from the system
gpu_ids = JobRunnerUtils.get_realtime_gpu_available_ids().copy()
ComputeCacheManager.get_instance().get_gpu_cache().set_device_available_gpu_ids(device_id, gpu_ids)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,10 @@ def parse_resource_related_config(config, gpu_num_frm_platform=0):
num_gpus = 0

shm_size = config.get('shm_size', None)
# set shm_size to 8G if not specified
if not shm_size:
shm_size = "8G"

storage_opt = config.get('storage_opt', None)
tmpfs = config.get('tmpfs', None)
cpus = config.get('cpus', None)
Expand Down

0 comments on commit 9ff4a56

Please sign in to comment.