Remove working_dir from examples, and fix small bug in docker_user (#…

…1069)
run-house · Jul 26, 2024 · b0cdbfa · b0cdbfa
1 parent 2a392fc
commit b0cdbfa
Show file tree

Hide file tree

Showing 10 changed files with 9 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -50,7 +50,7 @@ def sd_generate(prompt, **inference_kwargs):
 
 if __name__ == "__main__":
     gpu = rh.cluster(name="rh-a10x", instance_type="A10G:1", provider="aws")
-    sd_env = rh.env(reqs=["torch", "transformers", "diffusers"], name="sd_generate", working_dir="./")
+    sd_env = rh.env(reqs=["torch", "transformers", "diffusers"], name="sd_generate")
 
     # Deploy the function and environment (syncing over local code changes and installing dependencies)
     remote_sd_generate = rh.function(sd_generate).to(gpu, env=sd_env)

diff --git a/examples/llama2-13b-ec2/llama2_ec2.py b/examples/llama2-13b-ec2/llama2_ec2.py
@@ -110,7 +110,6 @@ def predict(self, prompt_text, **inf_kwargs):
         ],
         secrets=["huggingface"],  # Needed to download Llama 2
         name="llama2inference",
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py b/examples/llama2-with-tgi-aws-inferentia2/tgi_llama2_inferentia.py
@@ -213,7 +213,6 @@ def restart_container(self):
         name="tgi_env",
         reqs=["docker"],
         secrets=["huggingface"],
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py b/examples/llama2-with-tgi-ec2/tgi_llama_ec2.py
@@ -184,7 +184,6 @@ def restart_container(self):
         name="tgi_env",
         reqs=["docker", "torch", "transformers"],
         secrets=["huggingface"],
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/llama3-8b-ec2/llama3_ec2.py b/examples/llama3-8b-ec2/llama3_ec2.py
@@ -126,7 +126,6 @@ def predict(self, prompt_text, **inf_kwargs):
         ],
         secrets=["huggingface"],  # Needed to download Llama 3 from HuggingFace
         name="llama3inference",
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py b/examples/llama3-8b-tgi-ec2/llama3_tgi_ec2.py
@@ -175,7 +175,6 @@ def deploy(self):
         name="tgi_env",
         reqs=["docker", "torch", "transformers"],
         secrets=["huggingface"],
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/llama3-vllm-gcp/llama3_vllm_gcp.py b/examples/llama3-vllm-gcp/llama3_vllm_gcp.py
@@ -120,7 +120,6 @@ async def main():
         reqs=["vllm==0.2.7"],  # >=0.3.0 causes Pydantic version error
         secrets=["huggingface"],  # Needed to download Llama 3 from HuggingFace
         name="llama3inference",
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/examples/mistral-aws-inferentia2/mistral_inferentia.py b/examples/mistral-aws-inferentia2/mistral_inferentia.py
@@ -151,15 +151,6 @@ def generate(self, messages: list, return_tensors="pt", sequence_length=256):
         ],
     )
 
-    # Next, we define the environment for our module. This includes the required dependencies that need
-    # to be installed on the remote machine.
-    #
-    # Learn more in the [Runhouse docs on envs](/docs/tutorials/api-envs).
-    env = rh.env(
-        name="instruct_env",
-        working_dir="./",
-    )
-
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call
     # `get_or_to` to run it on the remote cluster. Using `get_or_to` allows us to load the exiting Module
     # by the name `mistral-instruct` if it was already put on the cluster. If we want to update the module each
@@ -168,7 +159,7 @@ def generate(self, messages: list, return_tensors="pt", sequence_length=256):
     # Note that we also pass the `env` object to the `get_or_to` method, which will ensure that the environment is
     # set up on the remote machine before the module is run.
     remote_instruct_model = MistralInstruct().get_or_to(
-        cluster, env=env, name="mistral-instruct"
+        cluster, name="mistral-instruct"
     )
 
     # ## Loading and prompting the model

diff --git a/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py b/examples/mistral-with-tgi-ec2/tgi_mistral_ec2.py
@@ -159,7 +159,6 @@ def restart_container(self):
     env = rh.env(
         name="tgi_env",
         reqs=["docker", "openai", "torch", "transformers"],
-        working_dir="./",
     )
 
     # Finally, we define our module and run it on the remote cluster. We construct it normally and then call

diff --git a/runhouse/resources/hardware/on_demand_cluster.py b/runhouse/resources/hardware/on_demand_cluster.py
@@ -148,7 +148,13 @@ def docker_user(self) -> str:
         if self._docker_user:
             return self._docker_user
 
-        if not self.image_id:
+        # TODO detect whether this is a k8s cluster properly, and handle the user setting / SSH properly
+        #  (e.g. SkyPilot's new KubernetesCommandRunner)
+        if (
+            not self.image_id
+            or "docker:" not in self.image_id
+            or self.provider == "kubernetes"
+        ):
             return None
 
         from runhouse.resources.hardware.sky_ssh_runner import get_docker_user