diff --git a/ai-integrations/langchain-graphrag.ipynb b/ai-integrations/langchain-graphrag.ipynb
index e8d1c02..fa49cbb 100644
--- a/ai-integrations/langchain-graphrag.ipynb
+++ b/ai-integrations/langchain-graphrag.ipynb
@@ -5,7 +5,7 @@
"id": "b5dcbf95-9a30-416d-afed-d5b2bf0e8651",
"metadata": {},
"source": [
- "# GraphRAG with MongoDB and LangChain\n",
+ "# LangChain MongoDB Integration - GraphRAG\n",
"\n",
"This notebook is a companion to the [GraphRAG with MongoDB and LangChain](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/graph-rag/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
"\n",
diff --git a/ai-integrations/langchain-hybrid-search.ipynb b/ai-integrations/langchain-hybrid-search.ipynb
index aeed8a5..f6b458b 100644
--- a/ai-integrations/langchain-hybrid-search.ipynb
+++ b/ai-integrations/langchain-hybrid-search.ipynb
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Atlas Vector Search - LangChain Integration - Hybrid Search"
+ "# LangChain MongoDB Integration - Hybrid Search"
]
},
{
diff --git a/ai-integrations/langchain-local-rag.ipynb b/ai-integrations/langchain-local-rag.ipynb
new file mode 100644
index 0000000..6252a22
--- /dev/null
+++ b/ai-integrations/langchain-local-rag.ipynb
@@ -0,0 +1,238 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# LangChain MongoDB Integration - Implement RAG Locally"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook is a companion to the [LangChain Local RAG](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/get-started/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
+ "\n",
+ "\n",
+ "
\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "source": [
+ "## Create a local Atlas deployment\n",
+ "\n",
+ "Run the following commands in your terminal to set up your local Atlas deployment. \n",
+ "\n",
+ "```\n",
+ "atlas deployments setup\n",
+ "curl https://atlas-education.s3.amazonaws.com/sampledata.archive -o sampledata.archive\n",
+ "mongorestore --archive=sampledata.archive --port=\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "source": [
+ "## Set up the environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pip install --quiet --upgrade pymongo langchain gpt4all langchain-huggingface"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "MONGODB_URI = (\"mongodb://localhost:/?directConnection=true\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create embeddings with local model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pymongo import MongoClient\n",
+ "from langchain_huggingface import HuggingFaceEmbeddings\n",
+ "\n",
+ "# Connect to your local Atlas deployment or Atlas Cluster\n",
+ "client = MongoClient(MONGODB_URI)\n",
+ "collection = client[\"sample_airbnb\"][\"listingsAndReviews\"]\n",
+ "\n",
+ "# Load the embedding model (https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)\n",
+ "embedding_model = HuggingFaceEmbeddings(model_name=\"mixedbread-ai/mxbai-embed-large-v1\")\n",
+ "\n",
+ "# Filters for only documents with a summary field and without an embeddings field\n",
+ "filter = { '$and': [ { 'summary': { '$exists': True, \"$nin\": [ None, \"\" ] } }, { 'embeddings': { '$exists': False } } ] }\n",
+ "\n",
+ "# Creates embeddings for subset of the collection\n",
+ "updated_doc_count = 0\n",
+ "for document in collection.find(filter).limit(50):\n",
+ " text = document['summary']\n",
+ " embedding = embedding_model.embed_documents([text])[0]\n",
+ " collection.update_one({ '_id': document['_id'] }, { \"$set\": { 'embeddings': embedding } }, upsert=True)\n",
+ " updated_doc_count += 1\n",
+ "\n",
+ "print(\"Documents updated: {}\".format(updated_doc_count))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Configure the vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
+ "# Instantiate vector store\n",
+ "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
+ " connection_string = MONGODB_URI,\n",
+ " namespace = \"sample_airbnb.listingsAndReviews\",\n",
+ " embedding=embedding_model,\n",
+ " index_name=\"vector_index\",\n",
+ " embedding_key=\"embeddings\",\n",
+ " text_key=\"summary\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vector_store.create_vector_search_index(\n",
+ " dimensions = 1024, # The dimensions of the vector embeddings to be indexed\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Implement RAG with a local LLM\n",
+ "Before running the following code, [download the local model](https://gpt4all.io/models/gguf/mistral-7b-openorca.gguf2.Q4_0.gguf)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+ "from langchain_community.llms import GPT4All\n",
+ "\n",
+ "# Configure the LLM\n",
+ "local_path = \"\"\n",
+ "\n",
+ "# Callbacks support token-wise streaming\n",
+ "callbacks = [StreamingStdOutCallbackHandler()]\n",
+ "\n",
+ "# Verbose is required to pass to the callback manager\n",
+ "llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_core.prompts import PromptTemplate\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "from langchain_core.runnables import RunnablePassthrough\n",
+ "import pprint\n",
+ "\n",
+ "# Instantiate Atlas Vector Search as a retriever\n",
+ "retriever = vector_store.as_retriever()\n",
+ "\n",
+ "# Define prompt template\n",
+ "template = \"\"\"\n",
+ "Use the following pieces of context to answer the question at the end.\n",
+ "{context}\n",
+ "Question: {question}\n",
+ "\"\"\"\n",
+ "custom_rag_prompt = PromptTemplate.from_template(template)\n",
+ "\n",
+ "def format_docs(docs):\n",
+ " return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+ "\n",
+ "# Create chain \n",
+ "rag_chain = (\n",
+ " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
+ " | custom_rag_prompt\n",
+ " | llm\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "\n",
+ "# Prompt the chain\n",
+ "question = \"Can you recommend me a few AirBnBs that are beach houses?\"\n",
+ "answer = rag_chain.invoke(question)\n",
+ "\n",
+ "# Return source documents\n",
+ "documents = retriever.invoke(question)\n",
+ "print(\"\\nSource documents:\")\n",
+ "pprint.pprint(documents)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ai-integrations/langchain-memory-semantic-cache.ipynb b/ai-integrations/langchain-memory-semantic-cache.ipynb
new file mode 100644
index 0000000..2caa03e
--- /dev/null
+++ b/ai-integrations/langchain-memory-semantic-cache.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "922b6c28",
+ "metadata": {},
+ "source": [
+ "# LangChain MongoDB Integration - Memory and Semantic Caching for RAG"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1fc29d11",
+ "metadata": {},
+ "source": [
+ "This notebook is a companion to the [Memory and Semantic Caching](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/memory-semantic-cache/) tutorial. Refer to the page for set-up instructions and detailed explanations.\n",
+ "\n",
+ "\n",
+ "
\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a289ba35",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pip install --quiet --upgrade langchain langchain-community langchain-core langchain-mongodb langchain-openai"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c672ba1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+ "MONGODB_URI = \"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8384c99d",
+ "metadata": {},
+ "source": [
+ "## Configure the Vector Store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f90ce770",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
+ "from langchain_openai import OpenAIEmbeddings\n",
+ "\n",
+ "# Use text-embedding-ada-002 since that's what was used to create embeddings in the movies dataset\n",
+ "embedding_model = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
+ "\n",
+ "# Create the vector store\n",
+ "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
+ " connection_string = MONGODB_URI,\n",
+ " embedding = embedding_model,\n",
+ " namespace = \"sample_mflix.embedded_movies\",\n",
+ " text_key = \"plot\",\n",
+ " embedding_key = \"plot_embedding\",\n",
+ " relevance_score_fn = \"dotProduct\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8bf1bff8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Use helper method to create the vector search index\n",
+ "vector_store.create_vector_search_index(\n",
+ " dimensions = 1536, # The dimensions of the vector embeddings to be indexed\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c3b6654",
+ "metadata": {},
+ "source": [
+ "## Implement RAG with Memory"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "55583167",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "# Define the model to use for chat completion\n",
+ "llm = ChatOpenAI(model = \"gpt-4o\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3b3b0361",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory\n",
+ "from langchain_core.runnables.history import RunnableWithMessageHistory\n",
+ "from langchain_core.prompts import MessagesPlaceholder\n",
+ " \n",
+ "# Define a function that gets the chat message history \n",
+ "def get_session_history(session_id: str) -> MongoDBChatMessageHistory:\n",
+ " return MongoDBChatMessageHistory(\n",
+ " connection_string=MONGODB_URI,\n",
+ " session_id=session_id,\n",
+ " database_name=\"sample_mflix\",\n",
+ " collection_name=\"embedded_movies\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "74dfa896",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_core.prompts import ChatPromptTemplate\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "\n",
+ "# Create a prompt to generate standalone questions from follow-up questions\n",
+ "standalone_system_prompt = \"\"\"\n",
+ " Given a chat history and a follow-up question, rephrase the follow-up question to be a standalone question.\n",
+ " Do NOT answer the question, just reformulate it if needed, otherwise return it as is.\n",
+ " Only return the final standalone question.\n",
+ "\"\"\"\n",
+ "\n",
+ "standalone_question_prompt = ChatPromptTemplate.from_messages(\n",
+ " [\n",
+ " (\"system\", standalone_system_prompt),\n",
+ " MessagesPlaceholder(variable_name=\"history\"),\n",
+ " (\"human\", \"{question}\"),\n",
+ " ]\n",
+ ")\n",
+ "# Parse output as a string\n",
+ "parse_output = StrOutputParser()\n",
+ "\n",
+ "question_chain = standalone_question_prompt | llm | parse_output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c7ad7c83",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_core.runnables import RunnablePassthrough\n",
+ "\n",
+ "# Create a retriever\n",
+ "retriever = vector_store.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": 3})\n",
+ "\n",
+ "# Specify that the retrieved context includes both title and plot fields\n",
+ "get_documents = lambda docs: \"\\n\\n\".join([f\"Title: {d.metadata.get('title')}\\n{d.page_content}\" for d in docs])\n",
+ "\n",
+ "# Create a retriever chain that processes the question with history and retrieves documents\n",
+ "retriever_chain = RunnablePassthrough.assign(\n",
+ " context=question_chain | retriever | get_documents\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c15d460d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a prompt template that includes the retrieved context and chat history\n",
+ "rag_system_prompt = \"\"\"Answer the question based only on the following context:\n",
+ "{context}\n",
+ "\"\"\"\n",
+ "\n",
+ "rag_prompt = ChatPromptTemplate.from_messages(\n",
+ " [\n",
+ " (\"system\", rag_system_prompt),\n",
+ " MessagesPlaceholder(variable_name=\"history\"),\n",
+ " (\"human\", \"{question}\"),\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4401715b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build the RAG chain\n",
+ "rag_chain = (\n",
+ " retriever_chain\n",
+ " | rag_prompt\n",
+ " | llm\n",
+ " | parse_output\n",
+ ")\n",
+ "\n",
+ "# Wrap the chain with message history\n",
+ "rag_with_memory = RunnableWithMessageHistory(\n",
+ " rag_chain,\n",
+ " get_session_history,\n",
+ " input_messages_key=\"question\",\n",
+ " history_messages_key=\"history\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2093d8c8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# First question\n",
+ "response_1 = rag_with_memory.invoke(\n",
+ " {\"question\": \"What are some good science fiction movies?\"},\n",
+ " {\"configurable\": {\"session_id\": \"user_1\"}}\n",
+ ")\n",
+ "print(response_1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "14513bb6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Follow-up question that references the previous question\n",
+ "response_2 = rag_with_memory.invoke(\n",
+ " {\"question\": \"Which one has the best special effects?\"},\n",
+ " {\"configurable\": {\"session_id\": \"user_1\"}}\n",
+ ")\n",
+ "print(response_2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9b2c3c5",
+ "metadata": {},
+ "source": [
+ "## Add Semantic Caching\n",
+ "\n",
+ "The semantic cache caches only the input to the LLM. When using it in retrieval chains, \n",
+ "note that documents retrieved can change between runs, resulting in cache misses for \n",
+ "semantically similar queries."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "594315fe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb.cache import MongoDBAtlasSemanticCache\n",
+ "from langchain_core.globals import set_llm_cache\n",
+ "\n",
+ "# Configure the semantic cache\n",
+ "set_llm_cache(MongoDBAtlasSemanticCache(\n",
+ " connection_string = MONGODB_URI,\n",
+ " database_name = \"sample_mflix\",\n",
+ " collection_name = \"semantic_cache\",\n",
+ " embedding = embedding_model,\n",
+ " index_name = \"vector_index\",\n",
+ " similarity_threshold = 0.5 # Adjust based on your requirements\n",
+ "))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f8063217",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# First query (not cached)\n",
+ "rag_with_memory.invoke(\n",
+ " {\"question\": \"What is the plot of Titanic?\"},\n",
+ " {\"configurable\": {\"session_id\": \"user_2\"}}\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df4b0318",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%time\n",
+ "\n",
+ "# Second query (cached)\n",
+ "rag_with_memory.invoke(\n",
+ " {\"question\": \"Tell me about the movie, Titanic\"},\n",
+ " {\"configurable\": {\"session_id\": \"user_2\"}}\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai-integrations/langchain-parent-document-retrieval.ipynb b/ai-integrations/langchain-parent-document-retrieval.ipynb
index 3bfbc50..0ba6392 100644
--- a/ai-integrations/langchain-parent-document-retrieval.ipynb
+++ b/ai-integrations/langchain-parent-document-retrieval.ipynb
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Atlas Vector Search - LangChain Integration - Parent Document Retrieval"
+ "# LangChain MongoDB Integration - Parent Document Retrieval"
]
},
{
@@ -112,18 +112,14 @@
"metadata": {},
"outputs": [],
"source": [
- "import time\n",
- "\n",
"# Get the vector store instance from the retriever\n",
"vector_store = parent_doc_retriever.vectorstore\n",
"\n",
"# Use helper method to create the vector search index\n",
"vector_store.create_vector_search_index(\n",
- " dimensions = 1536 # The dimensions of the vector embeddings to be indexed\n",
- ")\n",
- "\n",
- "# Wait for the index to build (this can take around a minute)\n",
- "time.sleep(60)"
+ " dimensions = 1536, # The dimensions of the vector embeddings to be indexed\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")\n"
]
},
{
diff --git a/ai-integrations/langchain.ipynb b/ai-integrations/langchain.ipynb
index ba353b8..f275284 100644
--- a/ai-integrations/langchain.ipynb
+++ b/ai-integrations/langchain.ipynb
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# Atlas Vector Search - LangChain Integration - Get Started"
+ "# LangChain MongoDB Integration - Implement RAG"
]
},
{
@@ -91,16 +91,12 @@
"metadata": {},
"outputs": [],
"source": [
- "import time\n",
- "\n",
"# Use helper method to create the vector search index\n",
"vector_store.create_vector_search_index(\n",
" dimensions = 3072, # The dimensions of the vector embeddings to be indexed\n",
- " filters = [ \"page_label\" ]\n",
- ")\n",
- "\n",
- "# Wait for the index to build (this can take around a minute)\n",
- "time.sleep(60)"
+ " filters = [ \"page_label\" ],\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")"
]
},
{
diff --git a/ai-integrations/langgraph.ipynb b/ai-integrations/langgraph.ipynb
index bb47d84..7a5071f 100644
--- a/ai-integrations/langgraph.ipynb
+++ b/ai-integrations/langgraph.ipynb
@@ -6,7 +6,7 @@
"id": "3kMALXaMv-MS"
},
"source": [
- "# Atlas Vector Search - LangGraph Integration - RAG Agent"
+ "# LangGraph MongoDB Integration - Agentic RAG"
]
},
{
@@ -112,7 +112,6 @@
"source": [
"from langchain_mongodb.index import create_fulltext_search_index\n",
"from pymongo import MongoClient\n",
- "import time\n",
"\n",
"# Connect to your cluster\n",
"client = MongoClient(MONGODB_URI)\n",
@@ -121,11 +120,9 @@
"create_fulltext_search_index( \n",
" collection = client[\"sample_mflix\"][\"embedded_movies\"],\n",
" field = \"title\",\n",
- " index_name = \"search_index\"\n",
- ")\n",
- "\n",
- "# Wait for the index to build (this can take around a minute)\n",
- "time.sleep(60)"
+ " index_name = \"search_index\",\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")"
]
},
{
diff --git a/use-cases/local-rag.ipynb b/use-cases/local-rag.ipynb
index 5fd511d..14b7c94 100644
--- a/use-cases/local-rag.ipynb
+++ b/use-cases/local-rag.ipynb
@@ -20,6 +20,21 @@
""
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Create a local Atlas deployment\n",
+ "\n",
+ "Run the following commands in your terminal to set up your local Atlas deployment. \n",
+ "\n",
+ "```\n",
+ "atlas deployments setup\n",
+ "curl https://atlas-education.s3.amazonaws.com/sampledata.archive -o sampledata.archive\n",
+ "mongorestore --archive=sampledata.archive --port=\n",
+ "```"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -39,7 +54,7 @@
"metadata": {},
"outputs": [],
"source": [
- "ATLAS_CONNECTION_STRING = (\"\")\n",
+ "MONGODB_URI = (\"\")\n",
"# Use \"mongodb://localhost:/?directConnection=true\" for local Atlas deployments"
]
},
@@ -53,13 +68,13 @@
"from sentence_transformers import SentenceTransformer\n",
"\n",
"# Connect to your local Atlas deployment or Atlas Cluster\n",
- "client = MongoClient(ATLAS_CONNECTION_STRING)\n",
+ "client = MongoClient(MONGODB_URI)\n",
"\n",
"# Select the sample_airbnb.listingsAndReviews collection\n",
"collection = client[\"sample_airbnb\"][\"listingsAndReviews\"]\n",
"\n",
- "# Load the embedding model (https://huggingface.co/sentence-transformers/mixedbread-ai/mxbai-embed-large-v1)\n",
- "model_path = \"\"\n",
+ "# Load the embedding model (https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1)\n",
+ "model_path = \"\"\n",
"model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1')\n",
"model.save(model_path)\n",
"model = SentenceTransformer(model_path)\n",