Override onnx clip loading (#13800)

* Set caching options for hardware providers * Always use CPU for searching * Use new install strategy to remove onnxruntime and then install post wheels
blakeblackshear · Sep 17, 2024 · 2362d0e · 2362d0e
1 parent 90d7fc6
commit 2362d0e
Show file tree

Hide file tree

Showing 6 changed files with 82 additions and 7 deletions.
diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile
@@ -170,8 +170,8 @@ RUN /build_pysqlite3.sh
 COPY docker/main/requirements-wheels.txt /requirements-wheels.txt
 RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt
 
-COPY docker/main/requirements-wheels-nodeps.txt /requirements-wheels-nodeps.txt
-RUN pip3 wheel --no-deps --wheel-dir=/wheels -r /requirements-wheels-nodeps.txt
+COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt
+RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt
 
 
 # Collect deps in a single layer
@@ -215,6 +215,14 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \
     python3 -m pip install --upgrade pip && \
     pip3 install -U /deps/wheels/*.whl
 
+# We have to uninstall this dependency specifically
+# as it will break onnxruntime-openvino
+RUN pip3 uninstall -y onnxruntime
+
+RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \
+    python3 -m pip install --upgrade pip && \
+    pip3 install -U /deps/wheels/*.whl
+
 COPY --from=deps-rootfs / /
 
 RUN ldconfig

diff --git a/docker/main/requirements-wheels-nodeps.txt b/docker/main/requirements-wheels-nodeps.txt
diff --git a/docker/main/requirements-wheels-post.txt b/docker/main/requirements-wheels-post.txt
@@ -0,0 +1,3 @@
+# ONNX
+onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
+onnxruntime == 1.18.* ; platform_machine == 'aarch64'
diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt
@@ -30,10 +30,9 @@ ws4py == 0.5.*
 unidecode == 1.3.*
 # OpenVino & ONNX
 openvino == 2024.1.*
-onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64'
-onnxruntime == 1.18.* ; platform_machine == 'aarch64'
 # Embeddings
 chromadb == 0.5.0
+onnx_clip == 4.0.*
 # Generative AI
 google-generativeai == 0.6.*
 ollama == 0.2.*

diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py
@@ -85,7 +85,10 @@ def thumbnail(self) -> Collection:
     @property
     def description(self) -> Collection:
         return self.client.get_or_create_collection(
-            name="event_description", embedding_function=MiniLMEmbedding()
+            name="event_description",
+            embedding_function=MiniLMEmbedding(
+                preferred_providers=["CPUExecutionProvider"]
+            ),
         )
 
     def reindex(self) -> None:

diff --git a/frigate/embeddings/functions/clip.py b/frigate/embeddings/functions/clip.py
@@ -1,9 +1,13 @@
 """CLIP Embeddings for Frigate."""
 
+import errno
+import logging
 import os
+from pathlib import Path
 from typing import Tuple, Union
 
 import onnxruntime as ort
+import requests
 from chromadb import EmbeddingFunction, Embeddings
 from chromadb.api.types import (
     Documents,
@@ -39,10 +43,69 @@ def _load_models(
         models = []
         for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]:
             path = os.path.join(MODEL_CACHE_DIR, "clip", model_file)
-            models.append(OnnxClip._load_model(path, silent))
+            models.append(Clip._load_model(path, silent))
 
         return models[0], models[1]
 
+    @staticmethod
+    def _load_model(path: str, silent: bool):
+        providers = ort.get_available_providers()
+        options = []
+
+        for provider in providers:
+            if provider == "TensorrtExecutionProvider":
+                options.append(
+                    {
+                        "trt_timing_cache_enable": True,
+                        "trt_timing_cache_path": "/config/model_cache/tensorrt/ort",
+                        "trt_engine_cache_enable": True,
+                        "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines",
+                    }
+                )
+            elif provider == "OpenVINOExecutionProvider":
+                options.append({"cache_dir": "/config/model_cache/openvino/ort"})
+            else:
+                options.append({})
+
+        try:
+            if os.path.exists(path):
+                return ort.InferenceSession(
+                    path, providers=providers, provider_options=options
+                )
+            else:
+                raise FileNotFoundError(
+                    errno.ENOENT,
+                    os.strerror(errno.ENOENT),
+                    path,
+                )
+        except Exception:
+            s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}"
+            if not silent:
+                logging.info(
+                    f"The model file ({path}) doesn't exist "
+                    f"or it is invalid. Downloading it from the public S3 "
+                    f"bucket: {s3_url}."  # noqa: E501
+                )
+
+            # Download from S3
+            # Saving to a temporary file first to avoid corrupting the file
+            temporary_filename = Path(path).with_name(os.path.basename(path) + ".part")
+
+            # Create any missing directories in the path
+            temporary_filename.parent.mkdir(parents=True, exist_ok=True)
+
+            with requests.get(s3_url, stream=True) as r:
+                r.raise_for_status()
+                with open(temporary_filename, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=8192):
+                        f.write(chunk)
+                    f.flush()
+            # Finally move the temporary file to the correct location
+            temporary_filename.rename(path)
+            return ort.InferenceSession(
+                path, providers=provider, provider_options=options
+            )
+
 
 class ClipEmbedding(EmbeddingFunction):
     """Embedding function for CLIP model used in Chroma."""