Skip to content
This repository has been archived by the owner on Sep 9, 2024. It is now read-only.

Add Ocular Models Server To Host Embedding, ReRanking, FineTuned Models #87

Merged
merged 3 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion docker-compose.dev.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
version: "3"
services:
ocular-models-server:
container_name: ocular-models-server
build:
context: .
dockerfile: packages/ocular-models-server/Dockerfile.models
ports:
- 8000:8000

qdrant:
image: qdrant/qdrant:latest
restart: always
Expand Down Expand Up @@ -67,8 +75,9 @@ services:
environment:
- DATABASE_URL=postgresql://ocular:ocular@ocular-db:5432/ocular
- DATABASE_NAME=ocular
- PUPPETEER_SKIP_DOWNLOAD=true
command: npm run typeorm migration:run

pgadmin:
image: dpage/pgadmin4
container_name: pgadmin4_container
Expand Down
5 changes: 5 additions & 0 deletions packages/ocular-models-server/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
env.local
.env.dev
.env.local
.env.prod
__pycache__/
46 changes: 46 additions & 0 deletions packages/ocular-models-server/Dockerfile.models
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Use Ubunt 22.04 as base image
FROM ubuntu:22.04

# Install Python 3
# Make sure to not install recommends and to clean the
# install to minimize the size of the container as much as possible.
RUN apt-get update && \
apt-get install --no-install-recommends -y python3=3.10.6-1~22.04 && \
apt-get install --no-install-recommends -y python3-pip && \
apt-get install --no-install-recommends -y python3-venv=3.10.6-1~22.04 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*


# Install Node.js
RUN apt-get update && \
apt-get install --no-install-recommends -y nodejs npm && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*



# Set the working directory within the container
WORKDIR /app

# Download and Install The Hugging Face Models
# Copy necessary files to the container
COPY packages/ocular-models-server/requirements.txt .
COPY packages/ocular-models-server/download_models.py .
COPY packages/ocular-models-server/server.py .

# Create a virtual environment in the container
RUN python3 -m venv .venv

# Activate the virtual environment
ENV PATH="/app/.venv/bin:$PATH"

# Install Python dependencies from the requirements file
RUN pip install --no-cache-dir -r requirements.txt && \
# Get the models from Hugging Face to bake into the container
python3 download_models.py

EXPOSE 8000

# Run Node Server For The Models
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--reload"]
Binary file not shown.
15 changes: 15 additions & 0 deletions packages/ocular-models-server/download_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from transformers import AutoTokenizer
import os

def download_model(model_path, model_name):
"""Download a Hugging Face model and tokenizer to the specified directory"""
# Check if the directory already exists
if not os.path.exists(model_path):
# Create the directory
os.makedirs(model_path)

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(model_path)

# Download An Embedding Model
download_model('models/intfloat/e5-base-v2','intfloat/e5-base-v2')
5 changes: 5 additions & 0 deletions packages/ocular-models-server/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
transformers==4.30.2
torch==2.0.1
fastapi==0.70.0
transformers==4.30.2
uvicorn==0.15.0
46 changes: 46 additions & 0 deletions packages/ocular-models-server/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

import torch
from torch.nn.functional import normalize
from transformers import AutoModel, AutoTokenizer
from fastapi import FastAPI
from typing import List
from pydantic import BaseModel

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device}")

model_id = "intfloat/e5-base-v2"

# initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModel.from_pretrained(model_id).to(device)
model.eval()

def embed(docs: list[str]) -> list[list[float]]:
print(docs)
# tokenize
tokens = tokenizer(
docs, padding=True, max_length=512, truncation=True, return_tensors="pt"
).to(device)
with torch.no_grad():
# process with model for token-level embeddings
out = model(**tokens)
# mask padding tokens
last_hidden = out.last_hidden_state.masked_fill(
~tokens["attention_mask"][..., None].bool(), 0.0
)
# create mean pooled embeddings
doc_embeds = last_hidden.sum(dim=1) / \
tokens["attention_mask"].sum(dim=1)[..., None]
return doc_embeds.cpu().numpy().tolist()

app = FastAPI()

class Texts(BaseModel):
texts: list = []

@app.post("/embed")
async def embed_api(texts: Texts):
prefixed_texts = [f"passage: {d}" for d in texts.texts]
texts_embeds = embed(prefixed_texts)
return texts_embeds
2 changes: 2 additions & 0 deletions packages/ocular/Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

FROM node:18-alpine

RUN apk update && \
apk add chromium


WORKDIR /usr/src/app
Expand Down