Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[runtime env] Fix Ray hangs when nonexistent conda environment is specified #28105 #34956

Merged
merged 13 commits into from
Aug 23, 2023
36 changes: 23 additions & 13 deletions python/ray/_private/runtime_env/conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
create_conda_env_if_needed,
delete_conda_env,
get_conda_activate_commands,
get_conda_env_list,
)
from ray._private.runtime_env.context import RuntimeEnvContext
from ray._private.runtime_env.packaging import Protocol, parse_uri
from ray._private.runtime_env.plugin import RuntimeEnvPlugin
from ray._private.runtime_env.validation import parse_and_validate_conda
from ray._private.utils import (
get_directory_size_bytes,
get_master_wheel_url,
Expand Down Expand Up @@ -217,12 +219,10 @@ def get_uri(runtime_env: Dict) -> Optional[str]:
"""Return `"conda://<hashed_dependencies>"`, or None if no GC required."""
conda = runtime_env.get("conda")
if conda is not None:
if isinstance(conda, str):
if isinstance(conda, str) or isinstance(conda, dict):
# User-preinstalled conda env. We don't garbage collect these, so
# we don't track them with URIs.
rkooo567 marked this conversation as resolved.
Show resolved Hide resolved
uri = None
elif isinstance(conda, dict):
uri = "conda://" + _get_conda_env_hash(conda_dict=conda)
uri = f"conda://{_get_conda_env_hash(conda_dict=conda)}"
else:
raise TypeError(
"conda field received by RuntimeEnvAgent must be "
Expand Down Expand Up @@ -319,18 +319,28 @@ async def create(
context: RuntimeEnvContext,
logger: logging.Logger = default_logger,
) -> int:
if uri is None:
# The "conda" field is the name of an existing conda env, so no
# need to create one.
# TODO(architkulkarni): Try "conda activate" here to see if the
# env exists, and raise an exception if it doesn't.
if not runtime_env.has_conda():
return 0

# Currently create method is still a sync process, to avoid blocking
# the loop, need to run this function in another thread.
# TODO(Catch-Bull): Refactor method create into an async process, and
# make this method running in current loop.
def _create():
result = parse_and_validate_conda(runtime_env.get("conda"))
rkooo567 marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(result, str):
# The conda env name is given.
# In this case, we only verify if the given
# conda env exists.
conda_env_list = get_conda_env_list()
envs = [Path(env).name for env in conda_env_list]
if result not in envs:
raise ValueError(
f"The given conda environment '{result}' "
f"from the runtime env {runtime_env} doesn't "
"exist from the output of `conda env list --json`. "
"You can only specify an env that already exists. "
f"Please make sure to create an env {result} "
)
return 0

logger.debug(
"Setting up conda for runtime_env: " f"{runtime_env.serialize()}"
)
Expand Down
1 change: 1 addition & 0 deletions python/ray/_private/runtime_env/conda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def create_conda_env_if_needed(
"""
if logger is None:
logger = logging.getLogger(__name__)

conda_path = get_conda_bin_executable("conda")
try:
exec_cmd([conda_path, "--help"], throw_on_error=False)
Expand Down
18 changes: 18 additions & 0 deletions python/ray/tests/test_runtime_env_conda_and_pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,24 @@ def f():
assert ray.get(f.remote()) == 0


def test_runtime_env_conda_not_exists_not_hang(shutdown_only):
"""Verify when the conda env doesn't exist, it doesn't hang Ray."""
ray.init(runtime_env={"conda": "env_which_does_not_exist"})

@ray.remote
def f():
return 1

refs = [f.remote() for _ in range(5)]

for ref in refs:
with pytest.raises(ray.exceptions.RuntimeEnvSetupError) as exc_info:
ray.get(ref)
assert "doesn't exist from the output of `conda env list --json`" in str(
exc_info.value
) # noqa


if __name__ == "__main__":
if os.environ.get("PARALLEL_CI"):
sys.exit(pytest.main(["-n", "auto", "--boxed", "-vs", __file__]))
Expand Down