From 1b447477a99cbc7a3922fec2ebdd0b6d5cfe09d6 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 7 Aug 2024 09:43:49 -0700 Subject: [PATCH 1/4] Update Pinecone docs --- .../integrations/tools/tavily_search.ipynb | 2 +- .../integrations/vectorstores/pinecone.ipynb | 364 ++++++++++++++++++ .../integrations/vectorstores/pinecone.mdx | 52 --- 3 files changed, 365 insertions(+), 53 deletions(-) create mode 100644 docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/pinecone.mdx diff --git a/docs/core_docs/docs/integrations/tools/tavily_search.ipynb b/docs/core_docs/docs/integrations/tools/tavily_search.ipynb index 0f00521cddc8..ec6b12c0a0dd 100644 --- a/docs/core_docs/docs/integrations/tools/tavily_search.ipynb +++ b/docs/core_docs/docs/integrations/tools/tavily_search.ipynb @@ -31,7 +31,7 @@ "\n", "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/tools/tavily_search/) | Package latest |\n", "| :--- | :--- | :---: | :---: | :---: |\n", - "| [TavilySearchResults](https://api.js.langchain.com/classes/langchain_community_tools_tavily_search.TavilySearchResults.html) | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/__package_name__?style=flat-square&label=%20&) |\n", + "| [TavilySearchResults](https://api.js.langchain.com/classes/langchain_community_tools_tavily_search.TavilySearchResults.html) | [`@langchain/community`](https://www.npmjs.com/package/@langchain/community) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |\n", "\n", "## Setup\n", "\n", diff --git a/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb b/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb new file mode 100644 index 000000000000..2fc47869ddec --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Pinecone\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# PineconeStore\n", + "\n", + "[Pinecone](https://www.pinecone.io/) is a vector database that helps power AI for some of the world’s best companies.\n", + "\n", + "This guide provides a quick overview for getting started with Pinecone [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `PineconeStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_pinecone.PineconeStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/pinecone/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`PineconeStore`](https://api.js.langchain.com/classes/langchain_pinecone.PineconeStore.html) | [`@langchain/pinecone`](https://npmjs.com/@langchain/pinecone) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/pinecone?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Pinecone vector stores, you'll need to create a Pinecone account, initialize an index, and install the `@langchain/pinecone` integration package. You'll also want to install the [official Pinecone SDK](https://www.npmjs.com/package/@pinecone-database/pinecone) to initialize a client to pass into the `PineconeStore` instance.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/pinecone @pinecone-database/pinecone @langchain/openai\n", + "\n", + "```\n", + "\n", + "### Credentials\n", + "\n", + "Sign up for a [Pinecone](https://www.pinecone.io/) account and create an index. Make sure the dimensions match those of the embeddings you want to use (the default is 1536 for OpenAI's `text-embedding-3-small`). Once you've done this set the `PINECONE_INDEX`, `PINECONE_API_KEY`, and (optionally) `PINECONE_ENVIRONMENT` environment variables:\n", + "\n", + "```typescript\n", + "process.env.PINECONE_API_KEY = \"your-pinecone-api-key\";\n", + "process.env.PINECONE_INDEX = \"your-pinecone-index\";\n", + "\n", + "// Optional\n", + "process.env.PINECONE_ENVIRONMENT = \"your-pinecone-environment\";\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { PineconeStore } from \"@langchain/pinecone\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "import { Pinecone as PineconeClient } from \"@pinecone-database/pinecone\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const pinecone = new PineconeClient();\n", + "// Will automatically read the PINECONE_API_KEY and PINECONE_ENVIRONMENT env vars\n", + "const pineconeIndex = pinecone.Index(process.env.PINECONE_INDEX!);\n", + "\n", + "const vectorStore = await PineconeStore.fromExistingIndex(\n", + " embeddings,\n", + " {\n", + " pineconeIndex,\n", + " // Maximum number of batch requests to allow at once. Each batch is 1000 vectors.\n", + " maxConcurrency: 5,\n", + " // You can pass a namespace here too\n", + " // namespace: \"foo\",\n", + " }\n", + ");" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17f5efc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ '1', '2', '3', '4' ]\n" + ] + } + ], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { source: \"https://example.com\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "\n", + "await vectorStore.addDocuments(documents, { ids: [\"1\", \"2\", \"3\", \"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "### Delete items from vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ef61e188", + "metadata": {}, + "outputs": [], + "source": [ + "await vectorStore.delete({ ids: [\"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "// Optional filter\n", + "const filter = { source: \"https://example.com\" };\n", + "\n", + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2, filter);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.165] The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* [SIM=0.148] Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2, filter)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " // Optional filter\n", + " filter: filter,\n", + " k: 2,\n", + "});\n", + "\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `PineconeStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_pinecone.PineconeStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/pinecone.mdx b/docs/core_docs/docs/integrations/vectorstores/pinecone.mdx deleted file mode 100644 index d7654489f435..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/pinecone.mdx +++ /dev/null @@ -1,52 +0,0 @@ -# Pinecone - -You can use [Pinecone](https://www.pinecone.io/) vectorstores with LangChain. -To get started, install the integration package and the official Pinecone SDK with: - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install -S @langchain/pinecone @pinecone-database/pinecone -``` - -The below examples use OpenAI embeddings, but you can swap in whichever provider you'd like. -Keep in mind different embeddings models may have a different number of dimensions: - -```bash npm2yarn -npm install -S @langchain/openai -``` - -## Index docs - -import CodeBlock from "@theme/CodeBlock"; -import IndexExample from "@examples/indexes/vector_stores/pinecone/index_docs.ts"; - -{IndexExample} - -## Query docs - -import QueryExample from "@examples/indexes/vector_stores/pinecone/query_docs.ts"; - -{QueryExample} - -## Delete docs - -import DeleteExample from "@examples/indexes/vector_stores/pinecone/delete_docs.ts"; - -{DeleteExample} - -## Maximal marginal relevance search - -Pinecone supports maximal marginal relevance search, which takes a combination of documents -that are most similar to the inputs, then reranks and optimizes for diversity. - -import MMRExample from "@examples/indexes/vector_stores/pinecone/mmr.ts"; - -{MMRExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) From 4ebbd8954fe7a404757099dca82d8dfd3d3113b7 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 7 Aug 2024 10:01:56 -0700 Subject: [PATCH 2/4] Update Qdrant docs --- .../integrations/vectorstores/qdrant.ipynb | 337 ++++++++++++++++++ .../docs/integrations/vectorstores/qdrant.mdx | 58 --- 2 files changed, 337 insertions(+), 58 deletions(-) create mode 100644 docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/qdrant.mdx diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb b/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb new file mode 100644 index 000000000000..807f71ffe2c1 --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Qdrant\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# QdrantVectorStore\n", + "\n", + "[Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload.\n", + "\n", + "This guide provides a quick overview for getting started with Qdrant [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `QdrantVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_qdrant.QdrantVectorStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/qdrant/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`QdrantVectorStore`](https://api.js.langchain.com/classes/langchain_qdrant.QdrantVectorStore.html) | [`@langchain/qdrant`](https://npmjs.com/@langchain/qdrant) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/qdrant?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Qdrant vector stores, you'll need to set up a Qdrant instance and install the `@langchain/qdrant` integration package.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/qdrant @langchain/openai\n", + "\n", + "```\n", + "\n", + "After installing the required dependencies, run a Qdrant instance with Docker on your computer by following the [Qdrant setup instructions](https://qdrant.tech/documentation/quickstart/). Note the URL your container runs on.\n", + "\n", + "### Credentials\n", + "\n", + "Once you've done this set a `QDRANT_URL` environment variable:\n", + "\n", + "```typescript\n", + "// e.g. http://localhost:6333\n", + "process.env.QDRANT_URL = \"your-qdrant-url\"\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { QdrantVectorStore } from \"@langchain/qdrant\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const vectorStore = await QdrantVectorStore.fromExistingCollection(embeddings, {\n", + " url: process.env.QDRANT_URL,\n", + " collectionName: \"langchainjs-testing\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "17f5efc0", + "metadata": {}, + "outputs": [], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { source: \"https://example.com\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "\n", + "await vectorStore.addDocuments(documents);" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "Top-level document ids and deletion are currently not supported." + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const filter = {\n", + " \"must\": [\n", + " { \"key\": \"metadata.source\", \"match\": { \"value\": \"https://example.com\" } },\n", + " ]\n", + "};\n", + "\n", + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2, filter);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "See [this page](https://qdrant.tech/documentation/concepts/filtering/) for more on Qdrant filter syntax. Note that all values must be prefixed with `metadata.`\n", + "\n", + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.165] The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* [SIM=0.148] Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2, filter)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " // Optional filter\n", + " filter: filter,\n", + " k: 2,\n", + "});\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `QdrantVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_qdrant.QdrantVectorStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx b/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx deleted file mode 100644 index 02cb4d7ac80b..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/qdrant.mdx +++ /dev/null @@ -1,58 +0,0 @@ ---- -sidebar_class_name: node-only ---- - -# Qdrant - -[Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload. - -## Setup - -1. Run a Qdrant instance with Docker on your computer by following the [Qdrant setup instructions](https://qdrant.tech/documentation/quick-start/). -2. Install the Qdrant Node.js SDK. - - ```bash npm2yarn - npm install -S @langchain/qdrant - ``` - -3. Setup Env variables for Qdrant before running the code - - ```bash - export OPENAI_API_KEY=YOUR_OPENAI_API_KEY_HERE - export QDRANT_URL=YOUR_QDRANT_URL_HERE # for example http://localhost:6333 - ``` - -import CodeBlock from "@theme/CodeBlock"; - -## Usage - -### Create a new index from texts - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -import TextsExample from "@examples/indexes/vector_stores/qdrant/fromTexts.ts"; - -{TextsExample} - -### Create a new index from docs - -import DocsExample from "@examples/indexes/vector_stores/qdrant/fromDocs.ts"; - -{DocsExample} - -### Query docs from existing collection - -import ExistingExample from "@examples/indexes/vector_stores/qdrant/fromExisting.ts"; - -{ExistingExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) From e23381e226e7523c843960bff3c6ceb668f2903e Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 7 Aug 2024 12:05:32 -0700 Subject: [PATCH 3/4] Update vector stores --- .../vectorstores/mongodb_atlas.ipynb | 2 + .../integrations/vectorstores/pinecone.ipynb | 2 + .../integrations/vectorstores/qdrant.ipynb | 7 + .../integrations/vectorstores/redis.ipynb | 373 +++++++++++++++ .../docs/integrations/vectorstores/redis.mdx | 64 --- .../integrations/vectorstores/supabase.ipynb | 440 ++++++++++++++++++ .../integrations/vectorstores/supabase.mdx | 112 ----- .../integrations/vectorstores/upstash.ipynb | 363 +++++++++++++++ .../integrations/vectorstores/upstash.mdx | 67 --- .../integrations/vectorstores/weaviate.ipynb | 387 +++++++++++++++ .../integrations/vectorstores/weaviate.mdx | 57 --- .../src/cli/docs/templates/vectorstores.ipynb | 4 +- 12 files changed, 1576 insertions(+), 302 deletions(-) create mode 100644 docs/core_docs/docs/integrations/vectorstores/redis.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/redis.mdx create mode 100644 docs/core_docs/docs/integrations/vectorstores/supabase.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/supabase.mdx create mode 100644 docs/core_docs/docs/integrations/vectorstores/upstash.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/upstash.mdx create mode 100644 docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb delete mode 100644 docs/core_docs/docs/integrations/vectorstores/weaviate.mdx diff --git a/docs/core_docs/docs/integrations/vectorstores/mongodb_atlas.ipynb b/docs/core_docs/docs/integrations/vectorstores/mongodb_atlas.ipynb index 00053103059f..2f4b738372f6 100644 --- a/docs/core_docs/docs/integrations/vectorstores/mongodb_atlas.ipynb +++ b/docs/core_docs/docs/integrations/vectorstores/mongodb_atlas.ipynb @@ -234,6 +234,8 @@ "id": "dcf1b905", "metadata": {}, "source": [ + "**Note:** After adding documents, there is a slight delay before they become queryable.\n", + "\n", "Adding a document with the same `id` as an existing document will update the existing one.\n", "\n", "### Delete items from vector store" diff --git a/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb b/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb index 2fc47869ddec..d209cff67ce9 100644 --- a/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb +++ b/docs/core_docs/docs/integrations/vectorstores/pinecone.ipynb @@ -187,6 +187,8 @@ "id": "dcf1b905", "metadata": {}, "source": [ + "**Note:** After adding documents, there is a slight delay before they become queryable.\n", + "\n", "### Delete items from vector store" ] }, diff --git a/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb b/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb index 807f71ffe2c1..2fcd92d82813 100644 --- a/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb +++ b/docs/core_docs/docs/integrations/vectorstores/qdrant.ipynb @@ -11,6 +11,7 @@ "source": [ "---\n", "sidebar_label: Qdrant\n", + "sidebar_class_name: node-only\n", "---" ] }, @@ -21,6 +22,12 @@ "source": [ "# QdrantVectorStore\n", "\n", + "```{=mdx}\n", + ":::tip Compatibility\n", + "Only available on Node.js.\n", + ":::\n", + "```\n", + "\n", "[Qdrant](https://qdrant.tech/) is a vector similarity search engine. It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload.\n", "\n", "This guide provides a quick overview for getting started with Qdrant [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `QdrantVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_qdrant.QdrantVectorStore.html)." diff --git a/docs/core_docs/docs/integrations/vectorstores/redis.ipynb b/docs/core_docs/docs/integrations/vectorstores/redis.ipynb new file mode 100644 index 000000000000..f47baffc7e8e --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/redis.ipynb @@ -0,0 +1,373 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Redis\n", + "sidebar_class_name: node-only\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# RedisVectorStore\n", + "\n", + "```{=mdx}\n", + ":::tip Compatibility\n", + "Only available on Node.js.\n", + ":::\n", + "```\n", + "\n", + "[Redis](https://redis.io/) is a fast open source, in-memory data store. As part of the [Redis Stack](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/), [RediSearch](https://redis.io/docs/latest/develop/interact/search-and-query/) is the module that enables vector similarity semantic search, as well as many other types of searching.\n", + "\n", + "This guide provides a quick overview for getting started with Redis [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `RedisVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_redis.RedisVectorStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/redis/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`RedisVectorStore`](https://api.js.langchain.com/classes/langchain_redis.RedisVectorStore.html) | [`@langchain/redis`](https://npmjs.com/@langchain/redis/) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/redis?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Redis vector stores, you'll need to set up a Redis instance and install the `@langchain/redis` integration package. You can also install the [`node-redis`](https://github.com/redis/node-redis) package to initialize the vector store with a specific client instance.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/redis redis @langchain/openai\n", + "\n", + "```\n", + "\n", + "You can set up a Redis instance locally with Docker by following [these instructions](https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/#redisredis-stack).\n", + "\n", + "### Credentials\n", + "\n", + "Once you've set up an instance, set the `REDIS_URL` environment variable:\n", + "\n", + "```typescript\n", + "process.env.REDIS_URL = \"your-redis-url\"\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { RedisVectorStore } from \"@langchain/redis\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "import { createClient } from \"redis\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const client = createClient({\n", + " url: process.env.REDIS_URL ?? \"redis://localhost:6379\",\n", + "});\n", + "await client.connect();\n", + "\n", + "const vectorStore = new RedisVectorStore(embeddings, {\n", + " redisClient: client,\n", + " indexName: \"langchainjs-testing\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17f5efc0", + "metadata": {}, + "outputs": [], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { type: \"example\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { type: \"example\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { type: \"example\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { type: \"example\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "\n", + "await vectorStore.addDocuments(documents);" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "Top-level document ids and deletion are currently not supported." + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [], + "source": [ + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "Filtering will currently look for any metadata key containing the provided string.\n", + "\n", + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.835] The powerhouse of the cell is the mitochondria [{\"type\":\"example\"}]\n", + "* [SIM=0.852] Mitochondria are made out of lipids [{\"type\":\"example\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { type: 'example' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { type: 'example' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " k: 2,\n", + "});\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "069f1b5f", + "metadata": {}, + "source": [ + "## Deleting an index\n", + "\n", + "You can delete an entire index with the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f71ce986", + "metadata": {}, + "outputs": [], + "source": [ + "await vectorStore.delete({ deleteAll: true });" + ] + }, + { + "cell_type": "markdown", + "id": "bf2357b3", + "metadata": {}, + "source": [ + "## Closing connections\n", + "\n", + "Make sure you close the client connection when you are finished to avoid excessive resource consumption:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "48a98cba", + "metadata": {}, + "outputs": [], + "source": [ + "await client.disconnect();" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `RedisVectorSearch` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_redis.RedisVectorStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/redis.mdx b/docs/core_docs/docs/integrations/vectorstores/redis.mdx deleted file mode 100644 index 5a49a77169f2..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/redis.mdx +++ /dev/null @@ -1,64 +0,0 @@ ---- -sidebar_class_name: node-only ---- - -import CodeBlock from "@theme/CodeBlock"; - -# Redis - -[Redis](https://redis.io/) is a fast open source, in-memory data store. -As part of the [Redis Stack](https://redis.io/docs/stack/get-started/), [RediSearch](https://redis.io/docs/stack/search/) is the module that enables vector similarity semantic search, as well as many other types of searching. - -:::tip Compatibility -Only available on Node.js. -::: - -LangChain.js accepts [node-redis](https://github.com/redis/node-redis) as the client for Redis vectorstore. - -## Setup - -1. Run Redis with Docker on your computer following [the docs](https://redis.io/docs/stack/get-started/install/docker/#redisredis-stack) -2. Install the node-redis JS client - -```bash npm2yarn -npm install -S redis -``` - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/core @langchain/redis langchain -``` - -## Index docs - -import IndexExample from "@examples/indexes/vector_stores/redis/redis.ts"; - -{IndexExample} - -## Query docs - -import QueryExample from "@examples/indexes/vector_stores/redis/redis_query.ts"; - -{QueryExample} - -## Create index with options - -To pass arguments for [index creation](https://redis.io/commands/ft.create/), you can utilize the [available options](https://github.com/redis/node-redis/blob/294cbf8367295ac81cbe51ce2932493ab80493f1/packages/search/lib/commands/CREATE.ts#L4) offered by [node-redis](https://github.com/redis/node-redis) through `createIndexOptions` parameter. - -import IndexOptions from "@examples/indexes/vector_stores/redis/redis_index_options.ts"; - -{IndexOptions} - -## Delete an index - -import DeleteExample from "@examples/indexes/vector_stores/redis/redis_delete.ts"; - -{DeleteExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) diff --git a/docs/core_docs/docs/integrations/vectorstores/supabase.ipynb b/docs/core_docs/docs/integrations/vectorstores/supabase.ipynb new file mode 100644 index 000000000000..20145dca81ed --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/supabase.ipynb @@ -0,0 +1,440 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Supabase\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# SupabaseVectorStore\n", + "\n", + "[Supabase](https://supabase.com/docs) is an open-source Firebase alternative. Supabase is built on top of PostgreSQL, which offers strong SQL querying capabilities and enables a simple interface with already-existing tools and frameworks.\n", + "\n", + "LangChain.js supports using a Supabase Postgres database as a vector store, using the [`pgvector`](https://github.com/pgvector/pgvector) extension. Refer to the [Supabase blog post](https://supabase.com/blog/openai-embeddings-postgres-vector) for more information.\n", + "\n", + "This guide provides a quick overview for getting started with Supabase [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `SupabaseVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_vectorstores_supabase.SupabaseVectorStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/supabase/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`SupabaseVectorStore`](https://api.js.langchain.com/classes/langchain_community_vectorstores_supabase.SupabaseVectorStore.html) | [`@langchain/community`](https://npmjs.com/@langchain/community) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Supabase vector stores, you'll need to set up a Supabase database and install the `@langchain/community` integration package. You'll also need to install the official [`@supabase/supabase-js`](https://www.npmjs.com/package/@supabase/supabase-js) SDK as a peer dependency.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community @supabase/supabase-js @langchain/openai\n", + "\n", + "```\n", + "\n", + "Once you've created a database, run the following SQL to set up [`pgvector`](https://github.com/pgvector/pgvector) and create the necessary table and functions:\n", + "\n", + "```sql\n", + "-- Enable the pgvector extension to work with embedding vectors\n", + "create extension vector;\n", + "\n", + "-- Create a table to store your documents\n", + "create table documents (\n", + " id bigserial primary key,\n", + " content text, -- corresponds to Document.pageContent\n", + " metadata jsonb, -- corresponds to Document.metadata\n", + " embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed\n", + ");\n", + "\n", + "-- Create a function to search for documents\n", + "create function match_documents (\n", + " query_embedding vector(1536),\n", + " match_count int DEFAULT null,\n", + " filter jsonb DEFAULT '{}'\n", + ") returns table (\n", + " id bigint,\n", + " content text,\n", + " metadata jsonb,\n", + " embedding jsonb,\n", + " similarity float\n", + ")\n", + "language plpgsql\n", + "as $$\n", + "#variable_conflict use_column\n", + "begin\n", + " return query\n", + " select\n", + " id,\n", + " content,\n", + " metadata,\n", + " (embedding::text)::jsonb as embedding,\n", + " 1 - (documents.embedding <=> query_embedding) as similarity\n", + " from documents\n", + " where metadata @> filter\n", + " order by documents.embedding <=> query_embedding\n", + " limit match_count;\n", + "end;\n", + "$$;\n", + "```\n", + "\n", + "### Credentials\n", + "\n", + "Once you've done this set the `SUPABASE_PRIVATE_KEY` and `SUPABASE_URL` environment variables:\n", + "\n", + "```typescript\n", + "process.env.SUPABASE_PRIVATE_KEY = \"your-api-key\";\n", + "process.env.SUPABASE_URL = \"your-supabase-db-url\";\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { SupabaseVectorStore } from \"@langchain/community/vectorstores/supabase\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "import { createClient } from \"@supabase/supabase-js\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const supabaseClient = createClient(\n", + " process.env.SUPABASE_URL,\n", + " process.env.SUPABASE_PRIVATE_KEY\n", + ");\n", + "\n", + "const vectorStore = new SupabaseVectorStore(embeddings, {\n", + " client: supabaseClient,\n", + " tableName: \"documents\",\n", + " queryName: \"match_documents\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17f5efc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1, 2, 3, 4 ]\n" + ] + } + ], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { source: \"https://example.com\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "\n", + "await vectorStore.addDocuments(documents, { ids: [\"1\", \"2\", \"3\", \"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "### Delete items from vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ef61e188", + "metadata": {}, + "outputs": [], + "source": [ + "await vectorStore.delete({ ids: [\"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const filter = { source: \"https://example.com\" };\n", + "\n", + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2, filter);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.165] The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* [SIM=0.148] Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2, filter)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "180b0e66", + "metadata": {}, + "source": [ + "### Metadata Query Builder Filtering\n", + "\n", + "You can also use query builder-style filtering similar to how the [Supabase JavaScript library](https://supabase.com/docs/reference/javascript/using-filters) works instead of passing an object. Note that since most of the filter properties are in the metadata column, you need to use arrow operators (-> for integer or ->> for text) as defined in [Postgrest API documentation](https://postgrest.org/en/stable/references/api/tables_views.html#json-columns) and specify the data type of the property (e.g. the column should look something like `metadata->some_int_prop_name::int`)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e3287768", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "import { SupabaseFilterRPCCall } from \"@langchain/community/vectorstores/supabase\";\n", + "\n", + "const funcFilter: SupabaseFilterRPCCall = (rpc) =>\n", + " rpc.filter(\"metadata->>source\", \"eq\", \"https://example.com\");\n", + "\n", + "const funcFilterSearchResults = await vectorStore.similaritySearch(\"biology\", 2, funcFilter);\n", + "\n", + "for (const doc of funcFilterSearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " // Optional filter\n", + " filter: filter,\n", + " k: 2,\n", + "});\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `SupabaseVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_vectorstores_supabase.SupabaseVectorStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/supabase.mdx b/docs/core_docs/docs/integrations/vectorstores/supabase.mdx deleted file mode 100644 index 86583e975953..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/supabase.mdx +++ /dev/null @@ -1,112 +0,0 @@ -# Supabase - -Langchain supports using Supabase Postgres database as a vector store, using the `pgvector` postgres extension. Refer to the [Supabase blog post](https://supabase.com/blog/openai-embeddings-postgres-vector) for more information. - -## Setup - -### Install the library with - -```bash npm2yarn -npm install -S @supabase/supabase-js -``` - -### Create a table and search function in your database - -Run this in your database: - -```sql --- Enable the pgvector extension to work with embedding vectors -create extension vector; - --- Create a table to store your documents -create table documents ( - id bigserial primary key, - content text, -- corresponds to Document.pageContent - metadata jsonb, -- corresponds to Document.metadata - embedding vector(1536) -- 1536 works for OpenAI embeddings, change if needed -); - --- Create a function to search for documents -create function match_documents ( - query_embedding vector(1536), - match_count int DEFAULT null, - filter jsonb DEFAULT '{}' -) returns table ( - id bigint, - content text, - metadata jsonb, - embedding jsonb, - similarity float -) -language plpgsql -as $$ -#variable_conflict use_column -begin - return query - select - id, - content, - metadata, - (embedding::text)::jsonb as embedding, - 1 - (documents.embedding <=> query_embedding) as similarity - from documents - where metadata @> filter - order by documents.embedding <=> query_embedding - limit match_count; -end; -$$; -``` - -## Usage - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -import CodeBlock from "@theme/CodeBlock"; -import Example from "@examples/indexes/vector_stores/supabase.ts"; -import MetadataFilterExample from "@examples/indexes/vector_stores/supabase_with_metadata_filter.ts"; -import MetadataQueryBuilderFilterExample from "@examples/indexes/vector_stores/supabase_with_query_builder_metadata_filter.ts"; -import MaximumMarginalRelevanceExample from "@examples/indexes/vector_stores/supabase_with_maximum_marginal_relevance.ts"; -import DeletionExample from "@examples/indexes/vector_stores/supabase_deletion.ts"; - -### Standard Usage - -The below example shows how to perform a basic similarity search with Supabase: - -{Example} - -### Metadata Filtering - -Given the above `match_documents` Postgres function, you can also pass a filter parameter to only documents with a specific metadata field value. This filter parameter is a JSON object, and the `match_documents` function will use the Postgres JSONB Containment operator `@>` to filter documents by the metadata field values you specify. See details on the [Postgres JSONB Containment operator](https://www.postgresql.org/docs/current/datatype-json.html#JSON-CONTAINMENT) for more information. - -**Note:** If you've previously been using `SupabaseVectorStore`, you may need to drop and recreate the `match_documents` function per the updated SQL above to use this functionality. - -{MetadataFilterExample} - -### Metadata Query Builder Filtering - -You can also use query builder-style filtering similar to how [the Supabase JavaScript library works](https://supabase.com/docs/reference/javascript/using-filters) instead of passing an object. Note that since most of the filter properties are in the metadata column, you need to use arrow operators (`->` for integer or `->>` for text) as defined in [Postgrest API documentation](https://postgrest.org/en/stable/references/api/tables_views.html?highlight=operators#json-columns) and specify the data type of the property (e.g. the column should look something like `metadata->some_int_value::int`). - -{MetadataQueryBuilderFilterExample} - -### Maximal marginal relevance - -You can use maximal marginal relevance search, which optimizes for similarity to the query AND diversity. - -**Note:** If you've previously been using `SupabaseVectorStore`, you may need to drop and recreate the `match_documents` function per the updated SQL above to use this functionality. - -{MaximumMarginalRelevanceExample} - -### Document deletion - -{DeletionExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) diff --git a/docs/core_docs/docs/integrations/vectorstores/upstash.ipynb b/docs/core_docs/docs/integrations/vectorstores/upstash.ipynb new file mode 100644 index 000000000000..15c1d8b82ecf --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/upstash.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Upstash Vector\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# UpstashVectorStore\n", + "\n", + "[Upstash Vector](https://upstash.com/) is a REST based serverless vector database, designed for working with vector embeddings.\n", + "\n", + "This guide provides a quick overview for getting started with Upstash [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `UpstashVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_vectorstores_upstash.UpstashVectorStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/upstash/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`UpstashVectorStore`](https://api.js.langchain.com/classes/langchain_community_vectorstores_upstash.UpstashVectorStore.html) | [`@langchain/community`](https://npmjs.com/@langchain/community) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Upstash vector stores, you'll need to create an Upstash account, create an index, and install the `@langchain/community` integration package. You'll also need to install the [`@upstash/vector`](https://www.npmjs.com/package/@upstash/vector) package as a peer dependency.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/community @upstash/vector @langchain/openai\n", + "\n", + "```\n", + "\n", + "You can create an index from the [Upstash Console](https://console.upstash.com/login). For further reference, see [the official docs](https://upstash.com/docs/vector/overall/getstarted).\n", + "\n", + "### Credentials\n", + "\n", + "Once you've set up an index, set the following environment variables:\n", + "\n", + "```typescript\n", + "process.env.UPSTASH_VECTOR_REST_URL = \"your-rest-url\";\n", + "process.env.UPSTASH_VECTOR_REST_TOKEN = \"your-rest-token\";\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Make sure your index has the same dimension count as your embeddings. The default for OpenAI `text-embedding-3-small` is 1536." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { UpstashVectorStore } from \"@langchain/community/vectorstores/upstash\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "import { Index } from \"@upstash/vector\";\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const indexWithCredentials = new Index({\n", + " url: process.env.UPSTASH_VECTOR_REST_URL,\n", + " token: process.env.UPSTASH_VECTOR_REST_TOKEN,\n", + "});\n", + "\n", + "const vectorStore = new UpstashVectorStore(embeddings, {\n", + " index: indexWithCredentials,\n", + " // You can use namespaces to partition your data in an index\n", + " // namespace: \"test-namespace\",\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17f5efc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ '1', '2', '3', '4' ]\n" + ] + } + ], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { source: \"https://example.com\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "\n", + "await vectorStore.addDocuments(documents, { ids: [\"1\", \"2\", \"3\", \"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "**Note:** After adding documents, there may be a slight delay before they become queryable.\n", + "\n", + "### Delete items from vector store" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ef61e188", + "metadata": {}, + "outputs": [], + "source": [ + "await vectorStore.delete({ ids: [\"4\"] });" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const filter = \"source = 'https://example.com'\";\n", + "\n", + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2, filter);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "See [this page](https://upstash.com/docs/vector/features/filtering) for more on Upstash Vector filter syntax.\n", + "\n", + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.576] The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* [SIM=0.557] Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2, filter)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " // Optional filter\n", + " filter: filter,\n", + " k: 2,\n", + "});\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `UpstashVectorStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_community_vectorstores_upstash.UpstashVectorStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/upstash.mdx b/docs/core_docs/docs/integrations/vectorstores/upstash.mdx deleted file mode 100644 index 8327da74daaf..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/upstash.mdx +++ /dev/null @@ -1,67 +0,0 @@ -import CodeBlock from "@theme/CodeBlock"; -import CreateClientExample from "@examples/indexes/vector_stores/upstash/create_client.ts"; -import IndexQueryExample from "@examples/indexes/vector_stores/upstash/index_and_query_docs.ts"; -import DeleteExample from "@examples/indexes/vector_stores/upstash/delete_docs.ts"; -import UpstashEmbeddingsExample from "@examples/indexes/vector_stores/upstash/upstash_embeddings.ts"; -import NamespaceExample from "@examples/indexes/vector_stores/upstash/namespaces.ts"; -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - -# Upstash Vector - -Upstash Vector is a REST based serverless vector database, designed for working with vector embeddings. - -## Setup - -1. Create Upstash Vector Index - -You can create an index from [Upstash Console](https://console.upstash.com/vector). For further reference, see [docs](https://upstash.com/docs/vector/overall/getstarted). - -2. Install Upstash Vector SDK. - -```bash npm2yarn -npm install -S @upstash/vector -``` - -We use OpenAI for the embeddings of the below examples. -However, you can also create the embeddings using the model of your choice, that is available in the LangChain. - - - -```bash npm2yarn -npm install @langchain/openai @langchain/community -``` - -## Create Upstash Vector Client - -There are two ways to create the client. You can either pass the credentials as string manually from the `.env` file (or as string variables), or you can retrieve the credentials from the environment automatically. - -{CreateClientExample} - -## Index and Query Documents - -You can index the LangChain documents with any model of your choice, and perform a search over these documents. It's possible to apply metadata filtering to the search results. See [the related docs here](https://upstash.com/docs/vector/features/filtering). - -{IndexQueryExample} - -## Namespaces - -You can use namespaces to partition your data in the index. Namespaces are useful when you want to query over huge amount of data, and you want to partition the data to make the queries faster. When you use namespaces, there won't be post-filtering on the results which will make the query results more precise. - -{NamespaceExample} - -## Upstash embeddings - -It's possible to use the embeddings service of Upstash, which is based on the embedding model of choice when creating the vector database. You don't need to create the embeddings manually, as the Upstash Vector service will handle this for you. - -{UpstashEmbeddingsExample} - -## Delete Documents - -You can also delete the documents you've indexed previously. - -{DeleteExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) diff --git a/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb new file mode 100644 index 000000000000..d9e0e41820b7 --- /dev/null +++ b/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "1957f5cb", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Weaviate\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "ef1f0986", + "metadata": {}, + "source": [ + "# WeaviateStore\n", + "\n", + "[Weaviate](https://weaviate.io/) is an open source vector database that stores both objects and vectors, allowing for combining vector search with structured filtering. LangChain connects to Weaviate via the weaviate-ts-client package, the official Typescript client for Weaviate.\n", + "\n", + "This guide provides a quick overview for getting started with Weaviate [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `WeaviateStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_weaviate.WeaviateStore.html)." + ] + }, + { + "cell_type": "markdown", + "id": "c824838d", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | [PY support](https://python.langchain.com/v0.2/docs/integrations/vectorstores/weaviate/) | Package latest |\n", + "| :--- | :--- | :---: | :---: |\n", + "| [`WeaviateStore`](https://api.js.langchain.com/classes/langchain_weaviate.WeaviateStore.html) | [`@langchain/weaviate`](https://npmjs.com/@langchain/weaviate) | ✅ | ![NPM - Version](https://img.shields.io/npm/v/@langchain/weaviate?style=flat-square&label=%20&) |" + ] + }, + { + "cell_type": "markdown", + "id": "36fdc060", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To use Weaviate vector stores, you'll need to set up a Weaviate instance and install the `@langchain/weaviate` integration package. You should also install the `weaviate-ts-client` package to initialize a client to connect to your instance with, and the `uuid` package if you want to assign indexed documents ids.\n", + "\n", + "This guide will also use [OpenAI embeddings](/docs/integrations/text_embedding/openai), which require you to install the `@langchain/openai` integration package. You can also use [other supported embeddings models](/docs/integrations/text_embedding) if you wish.\n", + "\n", + "```{=mdx}\n", + "import IntegrationInstallTooltip from \"@mdx_components/integration_install_tooltip.mdx\";\n", + "import Npm2Yarn from \"@theme/Npm2Yarn\";\n", + "\n", + "\n", + "\n", + "\n", + " @langchain/weaviate weaviate-ts-client uuid @langchain/openai\n", + "\n", + "```\n", + "\n", + "You'll need to run Weaviate either locally or on a server. See [the Weaviate documentation](https://weaviate.io/developers/weaviate/installation) for more information.\n", + "\n", + "### Credentials\n", + "\n", + "Once you've set up your instance, set the following environment variables:\n", + "\n", + "```typescript\n", + "// http or https\n", + "process.env.WEAVIATE_SCHEME = \"\";\n", + "// If running locally, include port e.g. \"localhost:8080\"\n", + "process.env.WEAVIATE_HOST = \"YOUR_HOSTNAME\";\n", + "// Optional, for cloud deployments\n", + "process.env.WEAVIATE_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you are using OpenAI embeddings for this guide, you'll need to set your OpenAI key as well:\n", + "\n", + "```typescript\n", + "process.env.OPENAI_API_KEY = \"YOUR_API_KEY\";\n", + "```\n", + "\n", + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:\n", + "\n", + "```typescript\n", + "// process.env.LANGCHAIN_TRACING_V2=\"true\"\n", + "// process.env.LANGCHAIN_API_KEY=\"your-api-key\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "93df377e", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc37144c-208d-4ab3-9f3a-0407a69fe052", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import { WeaviateStore } from \"@langchain/weaviate\";\n", + "import { OpenAIEmbeddings } from \"@langchain/openai\";\n", + "\n", + "import weaviate from \"weaviate-ts-client\";\n", + "// import { ApiKey } from \"weaviate-ts-client\"\n", + "\n", + "const embeddings = new OpenAIEmbeddings({\n", + " model: \"text-embedding-3-small\",\n", + "});\n", + "\n", + "const weaviateClient = weaviate.client({\n", + " scheme: process.env.WEAVIATE_SCHEME ?? \"http\",\n", + " host: process.env.WEAVIATE_HOST ?? \"localhost\",\n", + " // If necessary\n", + " // apiKey: new ApiKey(process.env.WEAVIATE_API_KEY ?? \"default\"),\n", + "});\n", + "\n", + "const vectorStore = new WeaviateStore(embeddings, {\n", + " client: weaviateClient,\n", + " // Must start with a capital letter\n", + " indexName: \"Langchainjs_test\",\n", + " // Default value\n", + " textKey: \"text\",\n", + " // Any keys you intend to set as metadata\n", + " metadataKeys: [\"source\"],\n", + "});" + ] + }, + { + "cell_type": "markdown", + "id": "ac6071d4", + "metadata": {}, + "source": [ + "## Manage vector store\n", + "\n", + "### Add items to vector store\n", + "\n", + "**Note:** If you want to associate ids with your indexed documents, they must be UUIDs." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "17f5efc0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " '610f9b92-9bee-473f-a4db-8f2ca6e3442d',\n", + " '995160fa-441e-41a0-b476-cf3785518a0d',\n", + " '0cdbe6d4-0df8-4f99-9b67-184009fee9a2',\n", + " '18a8211c-0649-467b-a7c5-50ebb4b9ca9d'\n", + "]\n" + ] + } + ], + "source": [ + "import type { Document } from \"@langchain/core/documents\";\n", + "import { v4 as uuidv4 } from \"uuid\";\n", + "\n", + "const document1: Document = {\n", + " pageContent: \"The powerhouse of the cell is the mitochondria\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document2: Document = {\n", + " pageContent: \"Buildings are made out of brick\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document3: Document = {\n", + " pageContent: \"Mitochondria are made out of lipids\",\n", + " metadata: { source: \"https://example.com\" }\n", + "};\n", + "\n", + "const document4: Document = {\n", + " pageContent: \"The 2024 Olympics are in Paris\",\n", + " metadata: { source: \"https://example.com\" }\n", + "}\n", + "\n", + "const documents = [document1, document2, document3, document4];\n", + "const uuids = [uuidv4(), uuidv4(), uuidv4(), uuidv4()];\n", + "\n", + "await vectorStore.addDocuments(documents, { ids: uuids });" + ] + }, + { + "cell_type": "markdown", + "id": "dcf1b905", + "metadata": {}, + "source": [ + "### Delete items from vector store\n", + "\n", + "You can delete by id as by passing a `filter` param:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ef61e188", + "metadata": {}, + "outputs": [], + "source": [ + "await vectorStore.delete({ ids: [uuids[3]] });" + ] + }, + { + "cell_type": "markdown", + "id": "c3620501", + "metadata": {}, + "source": [ + "## Query vector store\n", + "\n", + "Once your vector store has been created and the relevant documents have been added you will most likely wish to query it during the running of your chain or agent. \n", + "\n", + "### Query directly\n", + "\n", + "Performing a simple similarity search can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "aa0a16fa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const filter = {\n", + " where: {\n", + " operator: \"Equal\" as const,\n", + " path: [\"source\"],\n", + " valueText: \"https://example.com\",\n", + " }\n", + "};\n", + "\n", + "const similaritySearchResults = await vectorStore.similaritySearch(\"biology\", 2, filter);\n", + "\n", + "for (const doc of similaritySearchResults) {\n", + " console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "3ed9d733", + "metadata": {}, + "source": [ + "See [this page](https://weaviate.io/developers/weaviate/api/graphql/filters) for more on Weaviat filter syntax.\n", + "\n", + "If you want to execute a similarity search and receive the corresponding scores you can run:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5efd2eaa", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* [SIM=0.835] The powerhouse of the cell is the mitochondria [{\"source\":\"https://example.com\"}]\n", + "* [SIM=0.852] Mitochondria are made out of lipids [{\"source\":\"https://example.com\"}]\n" + ] + } + ], + "source": [ + "const similaritySearchWithScoreResults = await vectorStore.similaritySearchWithScore(\"biology\", 2, filter)\n", + "\n", + "for (const [doc, score] of similaritySearchWithScoreResults) {\n", + " console.log(`* [SIM=${score.toFixed(3)}] ${doc.pageContent} [${JSON.stringify(doc.metadata)}]`);\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "0c235cdc", + "metadata": {}, + "source": [ + "### Query by turning into retriever\n", + "\n", + "You can also transform the vector store into a [retriever](/docs/concepts/#retrievers) for easier usage in your chains. " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f3460093", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\n", + " Document {\n", + " pageContent: 'The powerhouse of the cell is the mitochondria',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " },\n", + " Document {\n", + " pageContent: 'Mitochondria are made out of lipids',\n", + " metadata: { source: 'https://example.com' },\n", + " id: undefined\n", + " }\n", + "]\n" + ] + } + ], + "source": [ + "const retriever = vectorStore.asRetriever({\n", + " // Optional filter\n", + " filter: filter,\n", + " k: 2,\n", + "});\n", + "await retriever.invoke(\"biology\");" + ] + }, + { + "cell_type": "markdown", + "id": "e2e0a211", + "metadata": {}, + "source": [ + "### Usage for retrieval-augmented generation\n", + "\n", + "For guides on how to use this vector store for retrieval-augmented generation (RAG), see the following sections:\n", + "\n", + "- [Tutorials: working with external knowledge](/docs/tutorials/#working-with-external-knowledge).\n", + "- [How-to: Question and answer with RAG](/docs/how_to/#qa-with-rag)\n", + "- [Retrieval conceptual docs](/docs/concepts#retrieval)" + ] + }, + { + "cell_type": "markdown", + "id": "8a27244f", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all `WeaviateStore` features and configurations head to the [API reference](https://api.js.langchain.com/classes/langchain_weaviate.WeaviateStore.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "TypeScript", + "language": "typescript", + "name": "tslab" + }, + "language_info": { + "codemirror_mode": { + "mode": "typescript", + "name": "javascript", + "typescript": true + }, + "file_extension": ".ts", + "mimetype": "text/typescript", + "name": "typescript", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/core_docs/docs/integrations/vectorstores/weaviate.mdx b/docs/core_docs/docs/integrations/vectorstores/weaviate.mdx deleted file mode 100644 index 0363e3fdd63e..000000000000 --- a/docs/core_docs/docs/integrations/vectorstores/weaviate.mdx +++ /dev/null @@ -1,57 +0,0 @@ ---- -hide_table_of_contents: true ---- - -import CodeBlock from "@theme/CodeBlock"; - -# Weaviate - -Weaviate is an open source vector database that stores both objects and vectors, allowing for combining vector search with structured filtering. -LangChain connects to Weaviate via the `weaviate-ts-client` package, the official Typescript client for Weaviate. - -LangChain inserts vectors directly to Weaviate, and queries Weaviate for the nearest neighbors of a given vector, so that you can use all the LangChain Embeddings integrations with Weaviate. - -## Setup - -Weaviate has their own standalone integration package with LangChain, accessible via [`@langchain/weaviate`](https://www.npmjs.com/package/@langchain/weaviate) on NPM! - -import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; - - - -```bash npm2yarn -npm install @langchain/weaviate @langchain/openai @langchain/community -``` - -You'll need to run Weaviate either locally or on a server, see [the Weaviate documentation](https://weaviate.io/developers/weaviate/installation) for more information. - -## Usage, insert documents - -import InsertExample from "@examples/indexes/vector_stores/weaviate_fromTexts.ts"; - -{InsertExample} - -## Usage, query documents - -import QueryExample from "@examples/indexes/vector_stores/weaviate_search.ts"; - -{QueryExample} - -## Usage, maximal marginal relevance - -import MaximumMarginalRelevanceExample from "@examples/indexes/vector_stores/weaviate_mmr.ts"; - -You can use maximal marginal relevance search, which optimizes for similarity to the query AND diversity. - -{MaximumMarginalRelevanceExample} - -## Usage, delete documents - -import DeleteExample from "@examples/indexes/vector_stores/weaviate_delete.ts"; - -{DeleteExample} - -## Related - -- Vector store [conceptual guide](/docs/concepts/#vectorstores) -- Vector store [how-to guides](/docs/how_to/#vectorstores) diff --git a/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb b/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb index bd9e31791e20..c5c6c369b784 100644 --- a/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb +++ b/libs/langchain-scripts/src/cli/docs/templates/vectorstores.ipynb @@ -23,7 +23,7 @@ "\n", "- TODO: Add any other relevant links, like information about underlying API, etc.\n", "\n", - "This guide provides a quick overview for getting started with [`__module_name__`](/docs/concepts/#vectorstores). For detailed documentation of all `__module_name__` features and configurations head to the [API reference](__api_ref_module__)." + "This guide provides a quick overview for getting started with __sidebar_label__ [vector stores](/docs/concepts/#vectorstores). For detailed documentation of all `__module_name__` features and configurations head to the [API reference](__api_ref_module__)." ] }, { @@ -39,7 +39,7 @@ "\n", "| Class | Package | [PY support](__python_doc_url__) | Package latest |\n", "| :--- | :--- | :---: | :---: |\n", - "| [__module_name__](__api_ref_module__) | [__package_name__](__api_ref_package__) | __py_support__ | ![NPM - Version](https://img.shields.io/npm/v/__package_name__?style=flat-square&label=%20&) |" + "| [`__module_name__`](__api_ref_module__) | [`__package_name__`](https://npmjs.com/__package_name__) | __py_support__ | ![NPM - Version](https://img.shields.io/npm/v/__package_name__?style=flat-square&label=%20&) |" ] }, { From 2ec1028e435c989c7c24d0ca022c798f36fde845 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 7 Aug 2024 12:25:29 -0700 Subject: [PATCH 4/4] Fix --- docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb index d9e0e41820b7..bacdc87c92fb 100644 --- a/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb +++ b/docs/core_docs/docs/integrations/vectorstores/weaviate.ipynb @@ -118,7 +118,8 @@ " model: \"text-embedding-3-small\",\n", "});\n", "\n", - "const weaviateClient = weaviate.client({\n", + "// The Weaviate SDK has an issue with types\n", + "const weaviateClient = (weaviate as any).client({\n", " scheme: process.env.WEAVIATE_SCHEME ?? \"http\",\n", " host: process.env.WEAVIATE_HOST ?? \"localhost\",\n", " // If necessary\n",