diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 9f530a6d..3a7a1a99 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -1,5 +1,13 @@ version: "3" services: + ocular-models-server: + container_name: ocular-models-server + build: + context: . + dockerfile: packages/ocular-models-server/Dockerfile.models + ports: + - 8000:8000 + qdrant: image: qdrant/qdrant:latest restart: always @@ -67,8 +75,9 @@ services: environment: - DATABASE_URL=postgresql://ocular:ocular@ocular-db:5432/ocular - DATABASE_NAME=ocular + - PUPPETEER_SKIP_DOWNLOAD=true command: npm run typeorm migration:run - + pgadmin: image: dpage/pgadmin4 container_name: pgadmin4_container diff --git a/packages/ocular-models-server/.gitignore b/packages/ocular-models-server/.gitignore new file mode 100644 index 00000000..45c04a9e --- /dev/null +++ b/packages/ocular-models-server/.gitignore @@ -0,0 +1,5 @@ +env.local +.env.dev +.env.local +.env.prod +__pycache__/ \ No newline at end of file diff --git a/packages/ocular-models-server/Dockerfile.models b/packages/ocular-models-server/Dockerfile.models new file mode 100644 index 00000000..997d535e --- /dev/null +++ b/packages/ocular-models-server/Dockerfile.models @@ -0,0 +1,46 @@ +# Use Ubunt 22.04 as base image +FROM ubuntu:22.04 + +# Install Python 3 +# Make sure to not install recommends and to clean the +# install to minimize the size of the container as much as possible. +RUN apt-get update && \ + apt-get install --no-install-recommends -y python3=3.10.6-1~22.04 && \ + apt-get install --no-install-recommends -y python3-pip && \ + apt-get install --no-install-recommends -y python3-venv=3.10.6-1~22.04 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + +# Install Node.js +RUN apt-get update && \ + apt-get install --no-install-recommends -y nodejs npm && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + + + +# Set the working directory within the container +WORKDIR /app + +# Download and Install The Hugging Face Models +# Copy necessary files to the container +COPY packages/ocular-models-server/requirements.txt . +COPY packages/ocular-models-server/download_models.py . +COPY packages/ocular-models-server/server.py . + +# Create a virtual environment in the container +RUN python3 -m venv .venv + +# Activate the virtual environment +ENV PATH="/app/.venv/bin:$PATH" + +# Install Python dependencies from the requirements file +RUN pip install --no-cache-dir -r requirements.txt && \ + # Get the models from Hugging Face to bake into the container + python3 download_models.py + +EXPOSE 8000 + +# Run Node Server For The Models +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--reload"] \ No newline at end of file diff --git a/packages/ocular-models-server/__pycache__/server.cpython-310.pyc b/packages/ocular-models-server/__pycache__/server.cpython-310.pyc new file mode 100644 index 00000000..b4d0f56a Binary files /dev/null and b/packages/ocular-models-server/__pycache__/server.cpython-310.pyc differ diff --git a/packages/ocular-models-server/download_models.py b/packages/ocular-models-server/download_models.py new file mode 100644 index 00000000..c29dcbab --- /dev/null +++ b/packages/ocular-models-server/download_models.py @@ -0,0 +1,15 @@ +from transformers import AutoTokenizer +import os + +def download_model(model_path, model_name): + """Download a Hugging Face model and tokenizer to the specified directory""" + # Check if the directory already exists + if not os.path.exists(model_path): + # Create the directory + os.makedirs(model_path) + + tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer.save_pretrained(model_path) + +# Download An Embedding Model +download_model('models/intfloat/e5-base-v2','intfloat/e5-base-v2') \ No newline at end of file diff --git a/packages/ocular-models-server/requirements.txt b/packages/ocular-models-server/requirements.txt new file mode 100644 index 00000000..ba7945f0 --- /dev/null +++ b/packages/ocular-models-server/requirements.txt @@ -0,0 +1,5 @@ +transformers==4.30.2 +torch==2.0.1 +fastapi==0.70.0 +transformers==4.30.2 +uvicorn==0.15.0 \ No newline at end of file diff --git a/packages/ocular-models-server/server.py b/packages/ocular-models-server/server.py new file mode 100644 index 00000000..fc50bb4b --- /dev/null +++ b/packages/ocular-models-server/server.py @@ -0,0 +1,46 @@ + +import torch +from torch.nn.functional import normalize +from transformers import AutoModel, AutoTokenizer +from fastapi import FastAPI +from typing import List +from pydantic import BaseModel + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(f"Using {device}") + +model_id = "intfloat/e5-base-v2" + +# initialize tokenizer and model +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModel.from_pretrained(model_id).to(device) +model.eval() + +def embed(docs: list[str]) -> list[list[float]]: + print(docs) + # tokenize + tokens = tokenizer( + docs, padding=True, max_length=512, truncation=True, return_tensors="pt" + ).to(device) + with torch.no_grad(): + # process with model for token-level embeddings + out = model(**tokens) + # mask padding tokens + last_hidden = out.last_hidden_state.masked_fill( + ~tokens["attention_mask"][..., None].bool(), 0.0 + ) + # create mean pooled embeddings + doc_embeds = last_hidden.sum(dim=1) / \ + tokens["attention_mask"].sum(dim=1)[..., None] + return doc_embeds.cpu().numpy().tolist() + +app = FastAPI() + +class Texts(BaseModel): + texts: list = [] + +@app.post("/embed") +async def embed_api(texts: Texts): + prefixed_texts = [f"passage: {d}" for d in texts.texts] + texts_embeds = embed(prefixed_texts) + return texts_embeds \ No newline at end of file diff --git a/packages/ocular/Dockerfile.dev b/packages/ocular/Dockerfile.dev index 915e6cb0..b2ef8292 100644 --- a/packages/ocular/Dockerfile.dev +++ b/packages/ocular/Dockerfile.dev @@ -1,6 +1,8 @@ FROM node:18-alpine +RUN apk update && \ + apk add chromium WORKDIR /usr/src/app