From 71ce00cc3dde940a0c8344c78e2283a43eb84b31 Mon Sep 17 00:00:00 2001 From: Abhishree Thittenamane <47577437+athitten@users.noreply.github.com> Date: Wed, 21 Feb 2024 21:53:11 -0800 Subject: [PATCH] Add settings to suppress bf16 compile errors in CI on V100 (#8481) * Add settings to suppress bf16 compile errors in CI on V100 Signed-off-by: Abhishree * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Abhishree Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- examples/nlp/language_modeling/megatron_gpt_pretraining.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/nlp/language_modeling/megatron_gpt_pretraining.py b/examples/nlp/language_modeling/megatron_gpt_pretraining.py index 44834b35a0cf..80158446d95a 100644 --- a/examples/nlp/language_modeling/megatron_gpt_pretraining.py +++ b/examples/nlp/language_modeling/megatron_gpt_pretraining.py @@ -13,6 +13,8 @@ # limitations under the License. +# To suppress BF16 compile related issue in the CI runs with turing/V100 +import torch._dynamo import torch.multiprocessing as mp from omegaconf.omegaconf import OmegaConf, open_dict @@ -22,6 +24,8 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +torch._dynamo.config.suppress_errors = True + mp.set_start_method("spawn", force=True)