From d5c8243a67436d06050107ffea7af4ab4021e039 Mon Sep 17 00:00:00 2001 From: bghira Date: Wed, 21 Aug 2024 14:48:49 -0600 Subject: [PATCH 1/2] debugging --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index bbc0cba9..10d09b97 100644 --- a/train.py +++ b/train.py @@ -871,9 +871,9 @@ def main(): offload_param = accelerator.state.deepspeed_plugin.deepspeed_config[ "zero_optimization" ]["offload_param"] - accelerator.state.deepspeed_plugin.deepspeed_config["zero_optimization"][ - "offload_param" - ]["pin_memory"] = False + # accelerator.state.deepspeed_plugin.deepspeed_config["zero_optimization"][ + # "offload_param" + # ]["pin_memory"] = False if offload_param["device"] == "nvme": if offload_param["nvme_path"] == "none": if args.offload_param_path is None: From fc286c7b023d8894021669ef648d4fe010fca2bf Mon Sep 17 00:00:00 2001 From: bghira Date: Wed, 21 Aug 2024 15:18:56 -0600 Subject: [PATCH 2/2] deepspeed: pin ze memory --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 10d09b97..5ee73fd0 100644 --- a/train.py +++ b/train.py @@ -871,9 +871,9 @@ def main(): offload_param = accelerator.state.deepspeed_plugin.deepspeed_config[ "zero_optimization" ]["offload_param"] - # accelerator.state.deepspeed_plugin.deepspeed_config["zero_optimization"][ - # "offload_param" - # ]["pin_memory"] = False + accelerator.state.deepspeed_plugin.deepspeed_config["zero_optimization"][ + "offload_param" + ]["pin_memory"] = True if offload_param["device"] == "nvme": if offload_param["nvme_path"] == "none": if args.offload_param_path is None: