diff --git a/runhouse/resources/hardware/cluster.py b/runhouse/resources/hardware/cluster.py index 8743b6715..12320f896 100644 --- a/runhouse/resources/hardware/cluster.py +++ b/runhouse/resources/hardware/cluster.py @@ -133,7 +133,9 @@ def client(self): f"Could not reach cluster {self.name} ({self.ips}). Is it up?" ) - connect_call = threading.Thread(target=self.connect_server_client) + connect_call = threading.Thread( + target=self.connect_server_client, kwargs={"force_reconnect": True} + ) connect_call.start() connect_call.join(timeout=5) if connect_call.is_alive(): diff --git a/runhouse/resources/packages/package.py b/runhouse/resources/packages/package.py index 4decc486f..3f31842e9 100644 --- a/runhouse/resources/packages/package.py +++ b/runhouse/resources/packages/package.py @@ -274,6 +274,7 @@ def _install(self, env: Union[str, "Env"] = None, cluster: "Cluster" = None): install_cmd = self._pip_install_cmd(env=env, cluster=cluster) logger.info(f"Running via install_method pip: {install_cmd}") retcode = run_setup_command(install_cmd, cluster=cluster)[0] + logger.info(f"cluster: {cluster}") if retcode != 0: raise RuntimeError( f"Pip install {install_cmd} failed, check that the package exists and is available for your platform." diff --git a/tests/conftest.py b/tests/conftest.py index 808f2cef2..a7776823e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -243,6 +243,7 @@ def event_loop(): ondemand_cluster, # noqa: F401 ondemand_gcp_cluster, # noqa: F401 ondemand_k8s_cluster, # noqa: F401 + ondemand_k8s_docker_cluster, # noqa: F401 v100_gpu_cluster, # noqa: F401 ) @@ -373,6 +374,7 @@ def event_loop(): "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "static_cpu_cluster", ] @@ -385,6 +387,7 @@ def event_loop(): "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "multinode_cpu_cluster", "static_cpu_cluster", diff --git a/tests/fixtures/on_demand_cluster_fixtures.py b/tests/fixtures/on_demand_cluster_fixtures.py index 1187cb397..ab1b80e58 100644 --- a/tests/fixtures/on_demand_cluster_fixtures.py +++ b/tests/fixtures/on_demand_cluster_fixtures.py @@ -36,11 +36,12 @@ def setup_test_cluster(args, request, create_env=False): "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "v100_gpu_cluster", "k80_gpu_cluster", "a10g_gpu_cluster", ], - ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "v100", "k80", "a10g"], + ids=["aws_cpu", "gcp_cpu", "k8s_cpu", "k8s_docker_cpu", "v100", "k80", "a10g"], ) def ondemand_cluster(request): return request.getfixturevalue(request.param) @@ -119,6 +120,24 @@ def ondemand_k8s_cluster(request): return cluster +@pytest.fixture(scope="session") +def ondemand_k8s_docker_cluster(request): + kube_config_path = Path.home() / ".kube" / "config" + + if not kube_config_path.exists(): + pytest.skip("no kubeconfig found") + + args = { + "name": "k8s-docker-cpu", + "provider": "kubernetes", + "instance_type": "CPU:1", + "memory": ".2", + "image_id": "docker:ubuntu:18.04", + } + cluster = setup_test_cluster(args, request) + return cluster + + @pytest.fixture(scope="session") def v100_gpu_cluster(request): args = {"name": "rh-v100", "instance_type": "V100:1", "provider": "aws"} diff --git a/tests/test_obj_store.py b/tests/test_obj_store.py index f20e8088f..8b9614105 100644 --- a/tests/test_obj_store.py +++ b/tests/test_obj_store.py @@ -29,6 +29,7 @@ "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "static_cpu_cluster", ] @@ -40,6 +41,7 @@ "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "multinode_cpu_cluster", "static_cpu_cluster", diff --git a/tests/test_resources/test_clusters/test_on_demand_cluster.py b/tests/test_resources/test_clusters/test_on_demand_cluster.py index 284168050..6b66798e7 100644 --- a/tests/test_resources/test_clusters/test_on_demand_cluster.py +++ b/tests/test_resources/test_clusters/test_on_demand_cluster.py @@ -77,6 +77,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl "ondemand_gcp_cluster", "ondemand_aws_https_cluster_with_auth", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", ] } MAXIMAL = { @@ -84,6 +85,7 @@ class TestOnDemandCluster(tests.test_resources.test_clusters.test_cluster.TestCl "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "v100_gpu_cluster", "k80_gpu_cluster", @@ -115,6 +117,7 @@ def test_restart_does_not_change_config_yaml(self, cluster): assert config_yaml_content_after_restart == config_yaml_content @pytest.mark.level("minimal") + @pytest.mark.skip("invalid") def test_autostop(self, cluster): rh.env( working_dir="local:./", reqs=["pytest", "pandas"], name="autostop_env" diff --git a/tests/test_resources/test_envs/test_env.py b/tests/test_resources/test_envs/test_env.py index 521a0a0dd..eaeb99497 100644 --- a/tests/test_resources/test_envs/test_env.py +++ b/tests/test_resources/test_envs/test_env.py @@ -76,6 +76,7 @@ class TestEnv(tests.test_resources.test_resource.TestResource): ], "cluster": [ "ondemand_aws_cluster", + "ondemand_k8s_docker_cluster", "static_cpu_cluster", ], } @@ -92,6 +93,7 @@ class TestEnv(tests.test_resources.test_resource.TestResource): "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "static_cpu_cluster", "multinode_cpu_cluster", diff --git a/tests/test_resources/test_secrets/test_secret.py b/tests/test_resources/test_secrets/test_secret.py index d98a7dbf3..7615101b7 100644 --- a/tests/test_resources/test_secrets/test_secret.py +++ b/tests/test_resources/test_secrets/test_secret.py @@ -93,6 +93,7 @@ class TestSecret(tests.test_resources.test_resource.TestResource): "ondemand_aws_cluster", "ondemand_gcp_cluster", "ondemand_k8s_cluster", + "ondemand_k8s_docker_cluster", "ondemand_aws_https_cluster_with_auth", "static_cpu_cluster", "multinode_cpu_cluster",