bring back rag

radical-data · Oct 1, 2024 · 3b0ca92 · 3b0ca92
1 parent c9be963
commit 3b0ca92
Show file tree

Hide file tree

Showing 9 changed files with 208 additions and 19 deletions.
diff --git a/.env.example b/.env.example
@@ -1 +1,2 @@
-OLLAMA_MODEL=qwen2.5:0.5b
+OLLAMA_MODEL=qwen2.5:0.5b
+OPENAI_API_KEY="your-key"
diff --git a/api/poetry.lock b/api/poetry.lock
diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -14,6 +14,8 @@ langchain-core = ">=0.3,<0.4"
 unstructured = {extras = ["pdf"], version = "^0.15.13"}
 langchain-ollama = "^0.2.0"
 aiosqlite = "^0.20.0"
+langchain-text-splitters = "^0.3.0"
+langchain-openai = "^0.2.1"
 
 
 [build-system]

diff --git a/api/src/config.py b/api/src/config.py
@@ -10,3 +10,12 @@ def get_model_name():
         )
         model_name = "qwen2.5:0.5b"
     return model_name
+
+
+def get_openai_api_key():
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    if not openai_api_key:
+        raise ValueError(
+            "The OPENAI_API_KEY environment variable is not defined. Please set it."
+        )
+    return openai_api_key
diff --git a/api/src/llm.py b/api/src/llm.py
@@ -3,32 +3,38 @@
 from langchain_core.output_parsers import StrOutputParser
 
 from langchain_core.vectorstores import InMemoryVectorStore
-from langchain_ollama import OllamaEmbeddings
+
+# from langchain_ollama import OllamaEmbeddings
+from langchain_openai import OpenAIEmbeddings
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.runnables import RunnablePassthrough
+from config import get_openai_api_key
 
 
 def setup_langchain(model_name: str):
-    # Load and split documents
+    print("load docs")
     loader = DirectoryLoader("./data/texts", show_progress=True)
     docs = loader.load()
 
+    print("splitting")
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     split_docs = text_splitter.split_documents(docs)
 
-    # Create embeddings and store in SQLite-VSS
+    print("create embeddings")
+    openai_api_key = get_openai_api_key()
+    embedding_function = OpenAIEmbeddings(api_key=openai_api_key)
+
     vector_store = InMemoryVectorStore.from_texts(
-        texts=[doc.page_content for doc in split_docs],
-        embedding=OllamaEmbeddings(model="nomic-embed-text:v1.5"),
+        texts=[doc.page_content for doc in split_docs], embedding=embedding_function
     )
 
-    # Create a retriever to search relevant chunks
+    print("create retriever")
     retriever = vector_store.as_retriever(
         search_type="similarity", search_kwargs={"k": 6}
     )
 
-    # Define prompt template with retrieved context and user question
+    print("define prompt")
     system_prompt = (
         "You are Baby AI. You are talking to users to learn about the world."
         "Use the following pieces of retrieved context to generate your response."

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -6,13 +6,15 @@ services:
     volumes:
       - ./api:/app
     depends_on:
-      - ollama
+      ollama:
+        condition: service_healthy
     networks:
       - app-network
     environment:
       - OLLAMA_HOST=ollama
       - OLLAMA_PORT=11434
       - OLLAMA_MODEL=${OLLAMA_MODEL}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
     command: poetry run python src/main.py
 
   web:

diff --git a/ollama/start-ollama.sh b/ollama/start-ollama.sh
@@ -3,10 +3,12 @@
 # Start the Ollama server in the background
 ollama serve &
 
-# Wait a few seconds for the server to start (optional but ensures readiness)
 sleep 5
 
-# Pull the model after the server has started
+# Pull the model
 ollama pull $OLLAMA_MODEL
 
+# Pull embedding model
+ollama pull "nomic-embed-text:v1.5"
+
 wait
diff --git a/web/Dockerfile b/web/Dockerfile
@@ -8,11 +8,7 @@ RUN npm run build
 
 # Step 2: Serve with nginx
 FROM nginx:alpine
-# Install curl for the health check
-RUN apk --no-cache add curl
 COPY --from=builder /app/build /usr/share/nginx/html
 EXPOSE 80
 
-HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 CMD curl -f http://localhost:80/ || exit 1
-
 CMD ["nginx", "-g", "daemon off;"]
diff --git a/web/src/routes/+page.svelte b/web/src/routes/+page.svelte
@@ -41,9 +41,7 @@
 
 	async function fetchLLMResponse(message: string) {
 		try {
-			const stream = await remoteRunnable.stream({
-				text: message
-			});
+			const stream = await remoteRunnable.stream(message);
 
 			for await (const chunk of stream) {
 				answer += chunk;