Fix failing synthetic monitors (#535)

* Fix algolia_indexer synthetic monitor * Fix db_to_sheet * Fix dbt_duckdb * Remove dbt_sqlite This depends on meltano, an example that is currently not tested, and it also looks up an NFS. * Fix mini_dalle_slackbot * Fix news_summarizer * Fix dreambooth_app * Fix instructor * Fix webscraper * Fix a bunch of "huggingface" secrets * Fix db_to_sheet * Revert changes to environment_name * Remove unused import for lints * Fix TGI synmon token * Fix TEI and TGI-Mixtral - TEI: Issue with HuggingFace secrets again - TGI-Mixtral: huggingface/text-generation-inference#1342
modal-labs · Jan 5, 2024 · 0ffa1f4 · 0ffa1f4
1 parent c44a177
commit 0ffa1f4
Show file tree

Hide file tree

Showing 20 changed files with 51 additions and 228 deletions.
diff --git a/06_gpu_and_ml/diffusers/train_and_serve_diffusers_script.py b/06_gpu_and_ml/diffusers/train_and_serve_diffusers_script.py
@@ -262,7 +262,7 @@ class AppConfig:
     gpu="A100",  # finetuning is VRAM hungry, so this should be an A100
     volumes=VOLUME_CONFIG,
     timeout=3600 * 2,  # multiple hours
-    secrets=[Secret.from_name("huggingface")],
+    secrets=[Secret.from_name("huggingface-secret")],
 )
 # ## Define the training function
 # Now, finally, we define the training function itself. This training function does a bunch of preparatory things, but the core of it is the `_exec_subprocess` call to `accelerate launch` that launches the actual Diffusers training script. Depending on which Diffusers script you are using, you will want to modify the script name, and the arguments that are passed to it.

diff --git a/06_gpu_and_ml/dreambooth/dreambooth_app.py b/06_gpu_and_ml/dreambooth/dreambooth_app.py
@@ -33,7 +33,6 @@
 from modal import (
     Image,
     Mount,
-    Secret,
     Stub,
     Volume,
     asgi_app,
@@ -77,7 +76,7 @@
 # This is crucial as finetuning runs are separate from the Gradio app we run as a webhook.
 
 volume = Volume.persisted("dreambooth-finetuning-volume")
-MODEL_DIR = Path("/model")
+MODEL_DIR = "/model"
 
 # ## Config
 #
@@ -138,21 +137,21 @@ class AppConfig(SharedConfig):
 # So we can fetch just a few images, stored on consumer platforms like Imgur or Google Drive
 # -- no need for expensive data collection or data engineering.
 
-IMG_PATH = Path("/img")
 
-
-def load_images(image_urls):
+def load_images(image_urls: list[str]) -> Path:
     import PIL.Image
     from smart_open import open
 
-    os.makedirs(IMG_PATH, exist_ok=True)
+    img_path = Path("/img")
+
+    img_path.mkdir(parents=True, exist_ok=True)
     for ii, url in enumerate(image_urls):
         with open(url, "rb") as f:
             image = PIL.Image.open(f)
-            image.save(IMG_PATH / f"{ii}.png")
+            image.save(img_path / f"{ii}.png")
     print("Images loaded.")
 
-    return IMG_PATH
+    return img_path
 
 
 # ## Finetuning a text-to-image model
@@ -173,34 +172,24 @@ def load_images(image_urls):
 #
 # The model weights, libraries, and training script are all provided by [🤗 Hugging Face](https://huggingface.co).
 #
-# To access the model weights, you'll need a [Hugging Face account](https://huggingface.co/join)
-# and from that account you'll need to accept the model license [here](https://huggingface.co/runwayml/stable-diffusion-v1-5).
-#
-# Lastly, you'll need to create a token from that account and share it with Modal
-# under the name `"huggingface"`. Follow the instructions [here](https://modal.com/secrets).
-#
-# Then, you can kick off a training job with the command
-# `modal run dreambooth_app.py::stub.train`.
+# You can kick off a training job with the command `modal run dreambooth_app.py::stub.train`.
 # It should take about ten minutes.
 #
-# Tip: if the results you're seeing don't match the prompt too well, and instead produce an image of your subject again, the model has likely overfit. In this case, repeat training with a lower # of max_train_steps. On the other hand, if the results don't look like your subject, you might need to increase # of max_train_steps.
+# Tip: if the results you're seeing don't match the prompt too well, and instead produce an image
+# of your subject again, the model has likely overfit. In this case, repeat training with a lower
+# value of `max_train_steps`. On the other hand, if the results don't look like your subject, you
+# might need to increase `max_train_steps`.
 
 
 @stub.function(
     image=image,
-    gpu="A100",  # finetuning is VRAM hungry, so this should be an A100
-    volumes={
-        str(
-            MODEL_DIR
-        ): volume,  # fine-tuned model will be stored at `MODEL_DIR`
-    },
+    gpu="A100",  # fine-tuning is VRAM-heavy and requires an A100 GPU
+    volumes={MODEL_DIR: volume},  # stores fine-tuned model
     timeout=1800,  # 30 minutes
-    secrets=[Secret.from_name("huggingface")],
 )
 def train(instance_example_urls):
     import subprocess
 
-    import huggingface_hub
     from accelerate.utils import write_basic_config
     from transformers import CLIPTokenizer
 
@@ -214,10 +203,6 @@ def train(instance_example_urls):
     # set up hugging face accelerate library for fast training
     write_basic_config(mixed_precision="fp16")
 
-    # authenticate to hugging face so we can download the model weights
-    hf_key = os.environ["HUGGINGFACE_TOKEN"]
-    huggingface_hub.login(hf_key)
-
     # check whether we can access to model repo
     try:
         CLIPTokenizer.from_pretrained(config.model_name, subfolder="tokenizer")
@@ -283,7 +268,7 @@ def _exec_subprocess(cmd: list[str]):
 @stub.cls(
     image=image,
     gpu="A100",
-    volumes={str(MODEL_DIR): volume},
+    volumes={MODEL_DIR: volume},
 )
 class Model:
     def __enter__(self):

diff --git a/06_gpu_and_ml/embeddings/instructor.py b/06_gpu_and_ml/embeddings/instructor.py
@@ -39,7 +39,7 @@ def compare(self, sentences_a, sentences_b):
         embeddings_a = self.model.encode(sentences_a)
         embeddings_b = self.model.encode(sentences_b)
         similarities = cosine_similarity(embeddings_a, embeddings_b)
-        return similarities
+        return similarities.tolist()
 
 
 @stub.local_entrypoint()

diff --git a/06_gpu_and_ml/embeddings/text_embeddings_inference.py b/06_gpu_and_ml/embeddings/text_embeddings_inference.py
@@ -29,7 +29,13 @@
 
 
 def spawn_server() -> subprocess.Popen:
-    process = subprocess.Popen(["text-embeddings-router"] + LAUNCH_FLAGS)
+    process = subprocess.Popen(
+        ["text-embeddings-router"] + LAUNCH_FLAGS,
+        env={
+            **os.environ,
+            "HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
+        },
+    )
 
     # Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
     while True:
@@ -74,7 +80,7 @@ def download_model():
 
 
 @stub.cls(
-    secret=Secret.from_name("huggingface"),
+    secret=Secret.from_name("huggingface-secret"),
     gpu=GPU_CONFIG,
     image=tei_image,
     # Use up to 20 GPU containers at once.

diff --git a/06_gpu_and_ml/text_generation_inference.py b/06_gpu_and_ml/text_generation_inference.py
@@ -12,6 +12,8 @@
 #
 # First we import the components we need from `modal`.
 
+import os
+import subprocess
 from pathlib import Path
 
 from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
@@ -49,8 +51,6 @@
 
 
 def download_model():
-    import subprocess
-
     subprocess.run(
         [
             "text-generation-server",
@@ -59,6 +59,10 @@ def download_model():
             "--revision",
             REVISION,
         ],
+        env={
+            **os.environ,
+            "HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
+        },
         check=True,
     )
 
@@ -69,21 +73,20 @@ def download_model():
 #
 # Next we run the download step to pre-populate the image with our model weights.
 #
-# For this step to work on a gated model such as LLaMA 2, the HUGGING_FACE_HUB_TOKEN environment
+# For this step to work on a gated model such as LLaMA 2, the HUGGINGFACE_TOKEN environment
 # variable must be set ([reference](https://github.com/huggingface/text-generation-inference#using-a-private-or-gated-model)).
+#
 # After [creating a HuggingFace access token](https://huggingface.co/settings/tokens),
 # head to the [secrets page](https://modal.com/secrets) to create a Modal secret.
 #
-# The key should be `HUGGING_FACE_HUB_TOKEN` and the value should be your access token.
-#
 # Finally, we install the `text-generation` client to interface with TGI's Rust webserver over `localhost`.
 
 stub = Stub("example-tgi-" + MODEL_ID.split("/")[-1])
 
 tgi_image = (
     Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.0.3")
     .dockerfile_commands("ENTRYPOINT []")
-    .run_function(download_model, secret=Secret.from_name("huggingface"))
+    .run_function(download_model, secret=Secret.from_name("huggingface-secret"))
     .pip_install("text-generation")
 )
 
@@ -109,7 +112,7 @@ def download_model():
 
 
 @stub.cls(
-    secret=Secret.from_name("huggingface"),
+    secret=Secret.from_name("huggingface-secret"),
     gpu=GPU_CONFIG,
     allow_concurrent_inputs=10,
     container_idle_timeout=60 * 10,
@@ -119,13 +122,16 @@ def download_model():
 class Model:
     def __enter__(self):
         import socket
-        import subprocess
         import time
 
         from text_generation import AsyncClient
 
         self.launcher = subprocess.Popen(
-            ["text-generation-launcher"] + LAUNCH_FLAGS
+            ["text-generation-launcher"] + LAUNCH_FLAGS,
+            env={
+                **os.environ,
+                "HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
+            },
         )
         self.client = AsyncClient("http://127.0.0.1:8000", timeout=60)
         self.template = """<s>[INST] <<SYS>>

diff --git a/06_gpu_and_ml/tgi_mixtral.py b/06_gpu_and_ml/tgi_mixtral.py
@@ -70,7 +70,7 @@ def download_model():
 # Finally, we install the `text-generation` client to interface with TGI's Rust webserver over `localhost`.
 
 tgi_image = (
-    Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.3.1")
+    Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.3.3")
     .dockerfile_commands("ENTRYPOINT []")
     .run_function(download_model, timeout=60 * 20)
     .pip_install("text-generation")

diff --git a/06_gpu_and_ml/vllm_inference.py b/06_gpu_and_ml/vllm_inference.py
@@ -67,7 +67,7 @@ def download_model_to_folder():
     .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
     .run_function(
         download_model_to_folder,
-        secret=Secret.from_name("huggingface"),
+        secret=Secret.from_name("huggingface-secret"),
         timeout=60 * 20,
     )
 )
@@ -82,7 +82,7 @@ def download_model_to_folder():
 # on the GPU for each subsequent invocation of the function.
 #
 # The `vLLM` library allows the code to remain quite clean.
-@stub.cls(gpu="A100", secret=Secret.from_name("huggingface"))
+@stub.cls(gpu="A100", secret=Secret.from_name("huggingface-secret"))
 class Model:
     def __enter__(self):
         from vllm import LLM

diff --git a/10_integrations/algolia_indexer.py b/10_integrations/algolia_indexer.py
@@ -73,7 +73,8 @@
 
 
 @stub.function(
-    image=algolia_image, secrets=[Secret.from_name("algolia-secret")]
+    image=algolia_image,
+    secrets=[Secret.from_name("algolia-secret")],
 )
 def crawl():
     # Installed with a 3.6 venv; Python 3.6 is unsupported by Modal, so use a subprocess instead.

diff --git a/10_integrations/dbt/dbt_sqlite.py b/10_integrations/dbt/dbt_sqlite.py
diff --git a/10_integrations/dbt/sample_proj_sqlite/.gitignore b/10_integrations/dbt/sample_proj_sqlite/.gitignore
diff --git a/10_integrations/dbt/sample_proj_sqlite/analyses/.gitkeep b/10_integrations/dbt/sample_proj_sqlite/analyses/.gitkeep
diff --git a/10_integrations/dbt/sample_proj_sqlite/dbt_project.yml b/10_integrations/dbt/sample_proj_sqlite/dbt_project.yml
diff --git a/10_integrations/dbt/sample_proj_sqlite/macros/.gitkeep b/10_integrations/dbt/sample_proj_sqlite/macros/.gitkeep