Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Set OMP_NUM_THREADS by default in Elastic #2569

Merged
merged 3 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions plugins/flytekit-kf-pytorch/flytekitplugins/kfpytorch/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,22 @@ def _execute(self, **kwargs) -> Any:
)
)

# If OMP_NUM_THREADS is not set, set it to 1 to avoid overloading the system.
# Doing so to copy the default behavior of torchrun.
# See https://github.com/pytorch/pytorch/blob/eea4ece256d74c6f25c1f4eab37b3f2f4aeefd4d/torch/distributed/run.py#L791
if "OMP_NUM_THREADS" not in os.environ and self.task_config.nproc_per_node > 1:
omp_num_threads = 1
logger.warning(
"\n*****************************************\n"
"Setting OMP_NUM_THREADS environment variable for each process to be "
"%s in default, to avoid your system being overloaded, "
"please further tune the variable for optimal performance in "
"your application as needed. \n"
"*****************************************",
omp_num_threads,
)
os.environ["OMP_NUM_THREADS"] = str(omp_num_threads)

config = LaunchConfig(
run_id=flytekit.current_context().execution_id.name,
min_nodes=self.min_nodes,
Expand Down
24 changes: 24 additions & 0 deletions plugins/flytekit-kf-pytorch/tests/test_elastic_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
from flytekit.configuration import SerializationSettings
from flytekit.exceptions.user import FlyteRecoverableException

@pytest.fixture(autouse=True, scope="function")
def restore_env():
original_env = os.environ.copy()
yield
os.environ.clear()
os.environ.update(original_env)

@dataclass
class Config(DataClassJsonMixin):
Expand Down Expand Up @@ -212,3 +218,21 @@ def test_task():
"ttlSecondsAfterFinished": 600,
"activeDeadlineSeconds": 36000,
}

@pytest.mark.parametrize("start_method", ["spawn", "fork"])
def test_omp_num_threads(start_method: str) -> None:
"""Test that the env var OMP_NUM_THREADS is set by default and not overwritten if set."""

@task(task_config=Elastic(nnodes=1, nproc_per_node=2, start_method=start_method))
def test_task_omp_default():
assert os.environ["OMP_NUM_THREADS"] == "1"

test_task_omp_default()

os.environ["OMP_NUM_THREADS"] = "42"

@task(task_config=Elastic(nnodes=1, nproc_per_node=2, start_method=start_method))
def test_task_omp_set():
assert os.environ["OMP_NUM_THREADS"] == "42"

test_task_omp_set()
2 changes: 1 addition & 1 deletion plugins/flytekit-kf-pytorch/tests/test_pytorch_task.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
pingsutw marked this conversation as resolved.
Show resolved Hide resolved
import pytest
from flytekitplugins.kfpytorch.task import CleanPodPolicy, Master, PyTorch, RestartPolicy, RunPolicy, Worker

Expand All @@ -17,7 +18,6 @@ def serialization_settings() -> SerializationSettings:
)
return settings


def test_pytorch_task(serialization_settings: SerializationSettings):
@task(
task_config=PyTorch(num_workers=10),
Expand Down
Loading