From e24217cfabd61889192f2b7c306ca56e069b0b11 Mon Sep 17 00:00:00 2001 From: Rohin Bhasin Date: Wed, 3 Jul 2024 12:42:54 -0400 Subject: [PATCH] Refactor `Package._install` to have clearer logic. --- runhouse/resources/packages/package.py | 228 ++++++++++-------- .../test_resources/test_data/test_package.py | 35 +-- 2 files changed, 152 insertions(+), 111 deletions(-) diff --git a/runhouse/resources/packages/package.py b/runhouse/resources/packages/package.py index b2318089e..5e7ac70fb 100644 --- a/runhouse/resources/packages/package.py +++ b/runhouse/resources/packages/package.py @@ -80,62 +80,103 @@ def __str__(self): return f"Package: {self.install_target.path}" return f"Package: {self.install_target}" - def _install_cmd(self, cluster: "Cluster" = None): - install_cmd = "" + @staticmethod + def _prepend_python_executable( + install_cmd: str, env: Union[str, "Env"] = None, cluster: "Cluster" = None + ): + return ( + f"python3 -m {install_cmd}" + if cluster or env + else f"{sys.executable} -m {install_cmd}" + ) + + @staticmethod + def _prepend_env_command(install_cmd: str, env: Union[str, "Env"] = None): + if env: + from runhouse.resources.envs.utils import _get_env_from + + env = _get_env_from(env) + install_cmd = env._full_command(install_cmd) + + return install_cmd + + def _pip_install_cmd( + self, env: Union[str, "Env"] = None, cluster: "Cluster" = None + ): install_args = f" {self.install_args}" if self.install_args else "" + if isinstance(self.install_target, Folder): + install_cmd = f"{self.install_target.local_path}" + install_args + else: + install_cmd = self.install_target + install_args + + install_cmd = f"pip install {self._install_cmd_for_torch(install_cmd, cluster)}" + install_cmd = self._prepend_python_executable( + install_cmd, cluster=cluster, env=env + ) + install_cmd = self._prepend_env_command(install_cmd, env=env) + return install_cmd + def _conda_install_cmd( + self, env: Union[str, "Env"] = None, cluster: "Cluster" = None + ): + install_args = f" {self.install_args}" if self.install_args else "" if isinstance(self.install_target, Folder): - # TODO [DG] Revisit for pip: Would be nice if we could use -e by default, but importlib on the rpc server - # isn't finding the package right after its installed. - # if (Path(local_path) / 'setup.py').exists(): - # install_cmd = f'-e {local_path}' + install_args + install_cmd = f"{self.install_target.local_path}" + install_args + else: + install_cmd = self.install_target + install_args + + install_cmd = f"conda install -y {install_cmd}" + install_cmd = self._prepend_env_command(install_cmd, env=env) + install_conda(cluster=cluster) + return install_cmd + + def _reqs_install_cmd( + self, env: Union[str, "Env"] = None, cluster: "Cluster" = None + ): + install_args = f" {self.install_args}" if self.install_args else "" + if not isinstance(self.install_target, Folder): + install_cmd = self.install_target + install_args + else: path = self.install_target.local_path - if self.install_method in ["pip", "conda"]: - install_cmd = f"{path}" + install_args - elif self.install_method == "reqs": - if not cluster: - reqs_path = f"{path}/requirements.txt" - if not Path(reqs_path).expanduser().exists(): - return None - with open(reqs_path) as f: - reqs = f.readlines() - else: - if not self.install_target.system == cluster: - install_target = self.to(cluster).install_target - else: - install_target = self.install_target + # Read reqs_path and reqs from local if not cluster + if not cluster: + reqs_path = f"{path}/requirements.txt" + if not Path(reqs_path).expanduser().exists(): + return None - if not install_target.exists_in_system(): - return None - elif "requirements.txt" not in install_target.ls(full_paths=False): - return None + with open(reqs_path) as f: + reqs_list = f.readlines() - reqs = ( - install_target.get("requirements.txt", mode="r") - .strip("\n") - .split("\n") - ) - reqs_path = f"{install_target.path}/requirements.txt" + # Otherwise, make sure the target folder is on the cluster, + # and read reqs from the cluster + else: + if not self.install_target.system == cluster: + install_target = self.to(cluster).install_target + else: + install_target = self.install_target - install_cmd = self._requirements_txt_install_cmd( - path=reqs_path, - reqs=reqs, - args=install_args, - cluster=cluster, + if not install_target.exists_in_system(): + return None + elif "requirements.txt" not in install_target.ls(full_paths=False): + return None + + reqs_list = ( + install_target.get("requirements.txt", mode="r") + .strip("\n") + .split("\n") ) - else: - install_cmd = self.install_target + install_args + reqs_path = f"{install_target.path}/requirements.txt" - if self.install_method == "pip": - install_cmd = ( - f"pip install {self._install_cmd_for_torch(install_cmd, cluster)}" + install_cmd = self._reqs_install_cmd_for_torch( + reqs_path, reqs_list, install_args, cluster=cluster ) - elif self.install_method == "reqs": - install_cmd = f"pip install {install_cmd}" - elif self.install_method == "conda": - install_cmd = f"conda install -y {install_cmd}" + install_cmd = f"pip install {install_cmd}" + install_cmd = self._prepend_python_executable( + install_cmd, env=env, cluster=cluster + ) + install_cmd = self._prepend_env_command(install_cmd, env=env) return install_cmd def _install(self, env: Union[str, "Env"] = None, cluster: "Cluster" = None): @@ -148,12 +189,24 @@ def _install(self, env: Union[str, "Env"] = None, cluster: "Cluster" = None): """ logger.info(f"Installing {str(self)} with method {self.install_method}.") - install_cmd = self._install_cmd(cluster=cluster) if self.install_method == "pip": - self._pip_install(install_cmd, env, cluster=cluster) + install_cmd = self._pip_install_cmd(env=env, cluster=cluster) + logger.info(f"Running via install_method pip: {install_cmd}") + retcode = run_setup_command(install_cmd, cluster=cluster)[0] + if retcode != 0: + raise RuntimeError( + f"Pip install {install_cmd} failed, check that the package exists and is available for your platform." + ) elif self.install_method == "conda": - self._conda_install(install_cmd, env, cluster=cluster) + install_cmd = self._conda_install_cmd(env=env, cluster=cluster) + logger.info(f"Running via install_method conda: {install_cmd}") + retcode = run_setup_command(install_cmd, cluster=cluster)[0] + if retcode != 0: + raise RuntimeError( + f"Conda install {install_cmd} failed, check that the package exists and is " + "available for your platform." + ) elif self.install_method in ["reqs", "local"]: if isinstance(self.install_target, Folder): if not cluster: @@ -162,36 +215,40 @@ def _install(self, env: Union[str, "Env"] = None, cluster: "Cluster" = None): path = self.install_target.path else: path = self.to(cluster).install_target.path - install_cmd = self._install_cmd(cluster=cluster) if not path: return - if self.install_method == "reqs" and install_cmd: - logger.info( - f"pip installing {path}/requirements.txt with: {install_cmd}" - ) - self._pip_install(install_cmd, env, cluster=cluster) - else: - logger.info(f"{path}/requirements.txt not found, skipping") + if self.install_method == "reqs": + install_cmd = self._reqs_install_cmd(env=env, cluster=cluster) + if install_cmd: + logger.info(f"Running via install_method reqs: {install_cmd}") + retcode = run_setup_command(install_cmd, cluster=cluster)[0] + if retcode != 0: + raise RuntimeError( + f"Reqs install {install_cmd} failed, check that the package exists and is available for your platform." + ) + else: + logger.info( + f"{path}/requirements.txt not found, skipping reqs install" + ) sys.path.append(path) if not cluster else run_setup_command( f"export PATH=$PATH;{path}", cluster=cluster ) - elif ( - not cluster - and Path(self.install_target).resolve().expanduser().exists() - ): - sys.path.append(str(Path(self.install_target).resolve().expanduser())) + elif not cluster: + if Path(self.install_target).resolve().expanduser().exists(): + sys.path.append( + str(Path(self.install_target).resolve().expanduser()) + ) + else: + raise ValueError( + f"install_target {self.install_target} must be a Folder or a path to a directory for install_method {self.install_method}" + ) else: raise ValueError( - f"install_target must be a Folder or a path to a directory for " - f"install_method {self.install_method}" + f"If cluster is provided, install_target must be a Folder for install_method {self.install_method}" ) - # elif self.install_method == 'unpickle': - # # unpickle the functions and make them importable - # with self.get('functions.pickle') as f: - # sys.modules[self.name] = pickle.load(f) else: raise ValueError( f"Unknown install_method {self.install_method}. Try using cluster.run() or to install instead." @@ -200,24 +257,24 @@ def _install(self, env: Union[str, "Env"] = None, cluster: "Cluster" = None): # ---------------------------------- # Torch Install Helpers # ---------------------------------- - def _requirements_txt_install_cmd(self, path, reqs, args="", cluster=None): + def _reqs_install_cmd_for_torch( + self, reqs_path, reqs_list, install_args="", cluster=None + ): """Read requirements from file, append --index-url and --extra-index-url where relevant for torch packages, and return list of formatted packages.""" # if torch extra index url is already defined by the user or torch isn't a req, directly pip install reqs file - if not [req for req in reqs if "torch" in req]: - return f"-r {path}" + args + if not any("torch" in req for req in reqs_list): + return f"-r {reqs_path}" + install_args cuda_version_or_cpu = detect_cuda_version_or_cpu(cluster=cluster) - for req in reqs: + for req in reqs_list: if ( "--index-url" in req or "--extra-index-url" in req ) and "pytorch.org" in req: - return f"-r {path}" + args + return f"-r {reqs_path}" + install_args # add extra-index-url for torch if not found - return ( - f"-r {path} --extra-index-url {self._torch_index_url(cuda_version_or_cpu)}" - ) + return f"-r {reqs_path} --extra-index-url {self._torch_index_url(cuda_version_or_cpu)}" def _install_cmd_for_torch(self, install_cmd, cluster=None): """Return the correct formatted pip install command for the torch package(s) provided.""" @@ -284,19 +341,6 @@ def _packages_to_install_from_cmd(install_cmd: str): def _pip_install( install_cmd: str, env: Union[str, "Env"] = "", cluster: "Cluster" = None ): - """Run pip install.""" - install_cmd = ( - f"python3 -m {install_cmd}" - if cluster or env - else f"{sys.executable} -m {install_cmd}" - ) - - if env: - from runhouse.resources.envs.utils import _get_env_from - - env = _get_env_from(env) - install_cmd = env._full_command(install_cmd) - retcode = run_setup_command(install_cmd, cluster=cluster)[0] if retcode != 0: raise RuntimeError( @@ -307,16 +351,6 @@ def _pip_install( def _conda_install( install_cmd: str, env: Union[str, "Env"] = "", cluster: "Cluster" = None ): - """Run conda install.""" - if env: - if isinstance(env, str): - from runhouse.resources.envs import Env - - env = Env.from_name(env) - install_cmd = env.full_command(install_cmd) - - install_conda() - retcode = run_setup_command(install_cmd, cluster=cluster)[0] if retcode != 0: raise RuntimeError( diff --git a/tests/test_resources/test_data/test_package.py b/tests/test_resources/test_data/test_package.py index b45aa1205..36d2074f1 100644 --- a/tests/test_resources/test_data/test_package.py +++ b/tests/test_resources/test_data/test_package.py @@ -1,3 +1,4 @@ +import sys from pathlib import Path import pytest @@ -67,32 +68,38 @@ def test_from_string(self, pkg_str): # --------- test install command --------- @pytest.mark.level("unit") def test_pip_install_cmd(self, pip_package): - assert pip_package._install_cmd() == f"pip install {pip_package.install_target}" + assert ( + pip_package._pip_install_cmd() + == f"{sys.executable} -m pip install {pip_package.install_target}" + ) @pytest.mark.level("unit") def test_conda_install_cmd(self, conda_package): assert ( - conda_package._install_cmd() + conda_package._conda_install_cmd() == f"conda install -y {conda_package.install_target}" ) @pytest.mark.level("unit") def test_reqs_install_cmd(self, reqs_package): assert ( - reqs_package._install_cmd() - == f"pip install -r {reqs_package.install_target.local_path}/requirements.txt" + reqs_package._reqs_install_cmd() + == f"{sys.executable} -m pip install -r {reqs_package.install_target.local_path}/requirements.txt" ) @pytest.mark.level("unit") def test_git_install_cmd(self, git_package): - assert git_package._install_cmd() == f"pip install {git_package.install_target}" + assert ( + git_package._pip_install_cmd() + == f"{sys.executable} -m pip install {git_package.install_target}" + ) # --------- test install on cluster --------- @pytest.mark.level("local") def test_pip_install(self, cluster, pip_package): assert ( - pip_package._install_cmd(cluster) - == f"pip install {pip_package.install_target}" + pip_package._pip_install_cmd(cluster=cluster) + == f"python3 -m pip install {pip_package.install_target}" ) # install through remote ssh @@ -105,7 +112,7 @@ def test_pip_install(self, cluster, pip_package): @pytest.mark.level("release") def test_conda_install(self, cluster, conda_package): assert ( - conda_package._install_cmd(cluster) + conda_package._conda_install_cmd(cluster=cluster) == f"conda install -y {conda_package.install_target}" ) @@ -120,10 +127,10 @@ def test_conda_install(self, cluster, conda_package): def test_remote_reqs_install(self, cluster, reqs_package): path = reqs_package.to(cluster).install_target.path - assert reqs_package._install_cmd(cluster=cluster) in [ - None, - f"pip install -r {path}/requirements.txt", - ] + assert ( + reqs_package._reqs_install_cmd(cluster=cluster) + == f"python3 -m pip install -r {path}/requirements.txt" + ) reqs_package._install(cluster=cluster) @pytest.mark.level("local") @@ -155,8 +162,8 @@ def test_torch_reqs_install_command(self): dummy_pkg = rh.Package.from_string(specifier="pip:dummy_package") assert ( - dummy_pkg._requirements_txt_install_cmd(test_reqs_file, reqs_lines) - == f"-r {test_reqs_file} --extra-index-url {rh.Package.TORCH_INDEX_URLS.get('cpu')}" + f"-r {test_reqs_file} --extra-index-url {rh.Package.TORCH_INDEX_URLS.get('cpu')}" + in dummy_pkg._reqs_install_cmd_for_torch(test_reqs_file, reqs_lines) ) test_reqs_file.unlink()