mongodb · davidhou17 · Apr 30, 2025 · Apr 11, 2025 · Apr 11, 2025 · Apr 14, 2025
diff --git a/ai-integrations/langchain-graphrag.ipynb b/ai-integrations/langchain-graphrag.ipynb
@@ -5,7 +5,7 @@
    "id": "b5dcbf95-9a30-416d-afed-d5b2bf0e8651",
    "metadata": {},
    "source": [
-    "# GraphRAG with MongoDB and LangChain\n",
+    "# LangChain MongoDB Integration - GraphRAG\n",
     "\n",
     "This notebook is a companion to the [GraphRAG with MongoDB and LangChain](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/graph-rag/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
     "\n",

diff --git a/ai-integrations/langchain-hybrid-search.ipynb b/ai-integrations/langchain-hybrid-search.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Atlas Vector Search - LangChain Integration - Hybrid Search"
+    "# LangChain MongoDB Integration - Hybrid Search"
    ]
   },
   {

diff --git a/ai-integrations/langchain-local-rag.ipynb b/ai-integrations/langchain-local-rag.ipynb
@@ -0,0 +1,224 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LangChain MongoDB Integration - Implement RAG Locally"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook is a companion to the [LangChain Local RAG](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/get-started/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
+    "\n",
+    "<a target=\"_blank\" href=\"https://colab.research.google.com/github/mongodb/docs-notebooks/blob/main/ai-integrations/langchain-local-rag.ipynb\">\n",
+    "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+    "</a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "source": [
+    "## Create a local Atlas deployment\n",
+    "\n",
+    "Run the following command in your terminal to set up your local Atlas deployment. \n",
+    "\n",
+    "```\n",
+    "atlas deployments setup\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "source": [
+    "## Set up the environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pip install --quiet --upgrade pymongo langchain langchain-community langchain-huggingface gpt4all pypdf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MONGODB_URI = (\"mongodb://localhost:<port-number>/?directConnection=true\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configure the vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
+    "\n",
+    "# Load the embedding model (https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)\n",
+    "embedding_model = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\")\n",
+    "\n",
+    "# Instantiate vector store\n",
+    "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
+    "   connection_string = MONGODB_URI,\n",
+    "   namespace = \"langchain_db.local_rag\",\n",
+    "   embedding=embedding_model,\n",
+    "   index_name=\"vector_index\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_community.document_loaders import PyPDFLoader\n",
+    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "# Load the PDF\n",
+    "loader = PyPDFLoader(\"https://investors.mongodb.com/node/13176/pdf\")\n",
+    "data = loader.load()\n",
+    "\n",
+    "# Split PDF into documents\n",
+    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)\n",
+    "docs = text_splitter.split_documents(data)\n",
+    "\n",
+    "# Add data to the vector store\n",
+    "vector_store.add_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vector_store.create_vector_search_index(\n",
+    "  dimensions = 1024,       # The dimensions of the vector embeddings to be indexed\n",
+    "  wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Implement RAG with a local LLM\n",
+    "Before running the following code, [download the local model](https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "from langchain_community.llms import GPT4All\n",
+    "\n",
+    "# Configure the LLM\n",
+    "local_path = \"<path-to-model>\"\n",
+    "\n",
+    "# Callbacks support token-wise streaming\n",
+    "callbacks = [StreamingStdOutCallbackHandler()]\n",
+    "\n",
+    "# Verbose is required to pass to the callback manager\n",
+    "llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "import pprint\n",
+    "\n",
+    "# Instantiate Atlas Vector Search as a retriever\n",
+    "retriever = vector_store.as_retriever()\n",
+    "\n",
+    "# Define prompt template\n",
+    "template = \"\"\"\n",
+    "Use the following pieces of context to answer the question at the end.\n",
+    "{context}\n",
+    "Question: {question}\n",
+    "\"\"\"\n",
+    "custom_rag_prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "def format_docs(docs):\n",
+    "   return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+    "\n",
+    "# Create chain   \n",
+    "rag_chain = (\n",
+    "   {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+    "   | custom_rag_prompt\n",
+    "   | llm\n",
+    "   | StrOutputParser()\n",
+    ")\n",
+    "\n",
+    "# Prompt the chain\n",
+    "question = \"What was MongoDB's latest acquisition?\"\n",
+    "answer = rag_chain.invoke(question)\n",
+    "\n",
+    "# Return source documents\n",
+    "documents = retriever.invoke(question)\n",
+    "print(\"\\nSource documents:\")\n",
+    "pprint.pprint(documents)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}