diff --git a/README.md b/README.md index 56eb9ca4f..b6b4ad2ce 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ def sd_generate(prompt, **inference_kwargs): if __name__ == "__main__": gpu = rh.cluster(name="rh-a10x", instance_type="A10G:1", provider="aws") - sd_env = rh.env(reqs=["torch", "transformers", "diffusers"], name="sd_generate", working_dir="./") + sd_env = rh.env(reqs=["torch", "transformers", "diffusers"], name="sd_generate") # Deploy the function and environment (syncing over local code changes and installing dependencies) remote_sd_generate = rh.function(sd_generate).to(gpu, env=sd_env) diff --git a/examples/llama2-13b-ec2/llama2_ec2.py b/examples/llama2-13b-ec2/llama2_ec2.py index 658427841..c6bf1bb19 100644 --- a/examples/llama2-13b-ec2/llama2_ec2.py +++ b/examples/llama2-13b-ec2/llama2_ec2.py @@ -110,7 +110,6 @@ def predict(self, prompt_text, **inf_kwargs): ], secrets=["huggingface"], # Needed to download Llama 2 name="llama2inference", - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py b/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py index 5e24ad365..ecc1c4c45 100644 --- a/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py +++ b/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py @@ -213,7 +213,6 @@ def restart_container(self): name="tgi_env", reqs=["docker"], secrets=["huggingface"], - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py b/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py index 222503962..ee4d1fa81 100644 --- a/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py +++ b/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py @@ -184,7 +184,6 @@ def restart_container(self): name="tgi_env", reqs=["docker", "torch", "transformers"], secrets=["huggingface"], - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/llama3-8b-ec2/llama3_ec2.py b/examples/llama3-8b-ec2/llama3_ec2.py index 35ea6a608..bdf6b6a29 100644 --- a/examples/llama3-8b-ec2/llama3_ec2.py +++ b/examples/llama3-8b-ec2/llama3_ec2.py @@ -126,7 +126,6 @@ def predict(self, prompt_text, **inf_kwargs): ], secrets=["huggingface"], # Needed to download Llama 3 from HuggingFace name="llama3inference", - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py b/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py index 193b1a4ba..ddbd9f494 100644 --- a/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py +++ b/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py @@ -175,7 +175,6 @@ def deploy(self): name="tgi_env", reqs=["docker", "torch", "transformers"], secrets=["huggingface"], - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/llama3-vllm-gcp/llama3_vllm_gcp.py b/examples/llama3-vllm-gcp/llama3_vllm_gcp.py index 92e07842d..1c719e552 100644 --- a/examples/llama3-vllm-gcp/llama3_vllm_gcp.py +++ b/examples/llama3-vllm-gcp/llama3_vllm_gcp.py @@ -120,7 +120,6 @@ async def main(): reqs=["vllm==0.2.7"], # >=0.3.0 causes Pydantic version error secrets=["huggingface"], # Needed to download Llama 3 from HuggingFace name="llama3inference", - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/examples/mistral-aws-inferentia2/mistral_inferentia.py b/examples/mistral-aws-inferentia2/mistral_inferentia.py index f58eb71e1..b5fb32c1d 100644 --- a/examples/mistral-aws-inferentia2/mistral_inferentia.py +++ b/examples/mistral-aws-inferentia2/mistral_inferentia.py @@ -151,15 +151,6 @@ def generate(self, messages: list, return_tensors="pt", sequence_length=256): ], ) - # Next, we define the environment for our module. This includes the required dependencies that need - # to be installed on the remote machine. - # - # Learn more in the [Runhouse docs on envs](/docs/tutorials/api-envs). - env = rh.env( - name="instruct_env", - working_dir="./", - ) - # Finally, we define our module and run it on the remote cluster. We construct it normally and then call # `get_or_to` to run it on the remote cluster. Using `get_or_to` allows us to load the exiting Module # by the name `mistral-instruct` if it was already put on the cluster. If we want to update the module each @@ -168,7 +159,7 @@ def generate(self, messages: list, return_tensors="pt", sequence_length=256): # Note that we also pass the `env` object to the `get_or_to` method, which will ensure that the environment is # set up on the remote machine before the module is run. remote_instruct_model = MistralInstruct().get_or_to( - cluster, env=env, name="mistral-instruct" + cluster, name="mistral-instruct" ) # ## Loading and prompting the model diff --git a/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py b/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py index d1086894e..f83a362ed 100644 --- a/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py +++ b/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py @@ -159,7 +159,6 @@ def restart_container(self): env = rh.env( name="tgi_env", reqs=["docker", "openai", "torch", "transformers"], - working_dir="./", ) # Finally, we define our module and run it on the remote cluster. We construct it normally and then call diff --git a/runhouse/resources/hardware/on_demand_cluster.py b/runhouse/resources/hardware/on_demand_cluster.py index f8513709a..0f5f307c6 100644 --- a/runhouse/resources/hardware/on_demand_cluster.py +++ b/runhouse/resources/hardware/on_demand_cluster.py @@ -148,7 +148,13 @@ def docker_user(self) -> str: if self._docker_user: return self._docker_user - if not self.image_id: + # TODO detect whether this is a k8s cluster properly, and handle the user setting / SSH properly + # (e.g. SkyPilot's new KubernetesCommandRunner) + if ( + not self.image_id + or "docker:" not in self.image_id + or self.provider == "kubernetes" + ): return None from runhouse.resources.hardware.sky_ssh_runner import get_docker_user