diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile index 4d238f29b5..3cd73cf95f 100644 --- a/docker/main/Dockerfile +++ b/docker/main/Dockerfile @@ -170,8 +170,8 @@ RUN /build_pysqlite3.sh COPY docker/main/requirements-wheels.txt /requirements-wheels.txt RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt -COPY docker/main/requirements-wheels-nodeps.txt /requirements-wheels-nodeps.txt -RUN pip3 wheel --no-deps --wheel-dir=/wheels -r /requirements-wheels-nodeps.txt +COPY docker/main/requirements-wheels-post.txt /requirements-wheels-post.txt +RUN pip3 wheel --no-deps --wheel-dir=/wheels-post -r /requirements-wheels-post.txt # Collect deps in a single layer @@ -215,6 +215,14 @@ RUN --mount=type=bind,from=wheels,source=/wheels,target=/deps/wheels \ python3 -m pip install --upgrade pip && \ pip3 install -U /deps/wheels/*.whl +# We have to uninstall this dependency specifically +# as it will break onnxruntime-openvino +RUN pip3 uninstall -y onnxruntime + +RUN --mount=type=bind,from=wheels,source=/wheels-post,target=/deps/wheels \ + python3 -m pip install --upgrade pip && \ + pip3 install -U /deps/wheels/*.whl + COPY --from=deps-rootfs / / RUN ldconfig diff --git a/docker/main/requirements-wheels-nodeps.txt b/docker/main/requirements-wheels-nodeps.txt deleted file mode 100644 index 84eac63c26..0000000000 --- a/docker/main/requirements-wheels-nodeps.txt +++ /dev/null @@ -1 +0,0 @@ -onnx_clip == 4.0.* diff --git a/docker/main/requirements-wheels-post.txt b/docker/main/requirements-wheels-post.txt new file mode 100644 index 0000000000..c4ed338444 --- /dev/null +++ b/docker/main/requirements-wheels-post.txt @@ -0,0 +1,3 @@ +# ONNX +onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64' +onnxruntime == 1.18.* ; platform_machine == 'aarch64' \ No newline at end of file diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 83265f0b78..639d4b3c84 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -30,10 +30,9 @@ ws4py == 0.5.* unidecode == 1.3.* # OpenVino & ONNX openvino == 2024.1.* -onnxruntime-openvino == 1.18.* ; platform_machine == 'x86_64' -onnxruntime == 1.18.* ; platform_machine == 'aarch64' # Embeddings chromadb == 0.5.0 +onnx_clip == 4.0.* # Generative AI google-generativeai == 0.6.* ollama == 0.2.* diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 998738d50f..8179de15ec 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -85,7 +85,10 @@ def thumbnail(self) -> Collection: @property def description(self) -> Collection: return self.client.get_or_create_collection( - name="event_description", embedding_function=MiniLMEmbedding() + name="event_description", + embedding_function=MiniLMEmbedding( + preferred_providers=["CPUExecutionProvider"] + ), ) def reindex(self) -> None: diff --git a/frigate/embeddings/functions/clip.py b/frigate/embeddings/functions/clip.py index 867938aff4..6e44033dff 100644 --- a/frigate/embeddings/functions/clip.py +++ b/frigate/embeddings/functions/clip.py @@ -1,9 +1,13 @@ """CLIP Embeddings for Frigate.""" +import errno +import logging import os +from pathlib import Path from typing import Tuple, Union import onnxruntime as ort +import requests from chromadb import EmbeddingFunction, Embeddings from chromadb.api.types import ( Documents, @@ -39,10 +43,69 @@ def _load_models( models = [] for model_file in [IMAGE_MODEL_FILE, TEXT_MODEL_FILE]: path = os.path.join(MODEL_CACHE_DIR, "clip", model_file) - models.append(OnnxClip._load_model(path, silent)) + models.append(Clip._load_model(path, silent)) return models[0], models[1] + @staticmethod + def _load_model(path: str, silent: bool): + providers = ort.get_available_providers() + options = [] + + for provider in providers: + if provider == "TensorrtExecutionProvider": + options.append( + { + "trt_timing_cache_enable": True, + "trt_timing_cache_path": "/config/model_cache/tensorrt/ort", + "trt_engine_cache_enable": True, + "trt_engine_cache_path": "/config/model_cache/tensorrt/ort/trt-engines", + } + ) + elif provider == "OpenVINOExecutionProvider": + options.append({"cache_dir": "/config/model_cache/openvino/ort"}) + else: + options.append({}) + + try: + if os.path.exists(path): + return ort.InferenceSession( + path, providers=providers, provider_options=options + ) + else: + raise FileNotFoundError( + errno.ENOENT, + os.strerror(errno.ENOENT), + path, + ) + except Exception: + s3_url = f"https://lakera-clip.s3.eu-west-1.amazonaws.com/{os.path.basename(path)}" + if not silent: + logging.info( + f"The model file ({path}) doesn't exist " + f"or it is invalid. Downloading it from the public S3 " + f"bucket: {s3_url}." # noqa: E501 + ) + + # Download from S3 + # Saving to a temporary file first to avoid corrupting the file + temporary_filename = Path(path).with_name(os.path.basename(path) + ".part") + + # Create any missing directories in the path + temporary_filename.parent.mkdir(parents=True, exist_ok=True) + + with requests.get(s3_url, stream=True) as r: + r.raise_for_status() + with open(temporary_filename, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + f.flush() + # Finally move the temporary file to the correct location + temporary_filename.rename(path) + return ort.InferenceSession( + path, providers=provider, provider_options=options + ) + class ClipEmbedding(EmbeddingFunction): """Embedding function for CLIP model used in Chroma."""