diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore
index 3b2f9ca94e78..e1e4c5360d03 100644
--- a/docs/core_docs/.gitignore
+++ b/docs/core_docs/.gitignore
@@ -192,6 +192,8 @@ docs/how_to/chat_streaming.md
docs/how_to/chat_streaming.mdx
docs/how_to/character_text_splitter.md
docs/how_to/character_text_splitter.mdx
+docs/how_to/cancel_execution.md
+docs/how_to/cancel_execution.mdx
docs/how_to/callbacks_runtime.md
docs/how_to/callbacks_runtime.mdx
docs/how_to/callbacks_custom_events.md
@@ -208,8 +210,32 @@ docs/how_to/assign.md
docs/how_to/assign.mdx
docs/how_to/agent_executor.md
docs/how_to/agent_executor.mdx
+docs/integrations/text_embedding/togetherai.md
+docs/integrations/text_embedding/togetherai.mdx
+docs/integrations/text_embedding/openai.md
+docs/integrations/text_embedding/openai.mdx
+docs/integrations/text_embedding/azure_openai.md
+docs/integrations/text_embedding/azure_openai.mdx
+docs/integrations/retrievers/exa.md
+docs/integrations/retrievers/exa.mdx
+docs/integrations/retrievers/bedrock-knowledge-bases.md
+docs/integrations/retrievers/bedrock-knowledge-bases.mdx
+docs/integrations/llms/openai.md
+docs/integrations/llms/openai.mdx
+docs/integrations/llms/mistralai.md
+docs/integrations/llms/mistralai.mdx
docs/integrations/llms/mistral.md
docs/integrations/llms/mistral.mdx
+docs/integrations/llms/google_vertex_ai.md
+docs/integrations/llms/google_vertex_ai.mdx
+docs/integrations/llms/fireworks.md
+docs/integrations/llms/fireworks.mdx
+docs/integrations/llms/cohere.md
+docs/integrations/llms/cohere.mdx
+docs/integrations/llms/bedrock.md
+docs/integrations/llms/bedrock.mdx
+docs/integrations/llms/azure.md
+docs/integrations/llms/azure.mdx
docs/integrations/chat/togetherai.md
docs/integrations/chat/togetherai.mdx
docs/integrations/chat/openai.md
@@ -232,5 +258,23 @@ docs/integrations/chat/azure.md
docs/integrations/chat/azure.mdx
docs/integrations/chat/anthropic.md
docs/integrations/chat/anthropic.mdx
+docs/integrations/document_loaders/web_loaders/web_puppeteer.md
+docs/integrations/document_loaders/web_loaders/web_puppeteer.mdx
docs/integrations/document_loaders/web_loaders/web_cheerio.md
-docs/integrations/document_loaders/web_loaders/web_cheerio.mdx
\ No newline at end of file
+docs/integrations/document_loaders/web_loaders/web_cheerio.mdx
+docs/integrations/document_loaders/web_loaders/recursive_url_loader.md
+docs/integrations/document_loaders/web_loaders/recursive_url_loader.mdx
+docs/integrations/document_loaders/web_loaders/pdf.md
+docs/integrations/document_loaders/web_loaders/pdf.mdx
+docs/integrations/document_loaders/web_loaders/firecrawl.md
+docs/integrations/document_loaders/web_loaders/firecrawl.mdx
+docs/integrations/document_loaders/file_loaders/unstructured.md
+docs/integrations/document_loaders/file_loaders/unstructured.mdx
+docs/integrations/document_loaders/file_loaders/text.md
+docs/integrations/document_loaders/file_loaders/text.mdx
+docs/integrations/document_loaders/file_loaders/pdf.md
+docs/integrations/document_loaders/file_loaders/pdf.mdx
+docs/integrations/document_loaders/file_loaders/directory.md
+docs/integrations/document_loaders/file_loaders/directory.mdx
+docs/integrations/document_loaders/file_loaders/csv.md
+docs/integrations/document_loaders/file_loaders/csv.mdx
\ No newline at end of file
diff --git a/docs/core_docs/docs/how_to/cancel_execution.ipynb b/docs/core_docs/docs/how_to/cancel_execution.ipynb
new file mode 100644
index 000000000000..6f35150492ee
--- /dev/null
+++ b/docs/core_docs/docs/how_to/cancel_execution.ipynb
@@ -0,0 +1,297 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# How to cancel execution\n",
+ "\n",
+ "```{=mdx}\n",
+ ":::info Prerequisites\n",
+ "\n",
+ "This guide assumes familiarity with the following concepts:\n",
+ "\n",
+ "- [LangChain Expression Language](/docs/concepts/#langchain-expression-language)\n",
+ "- [Chains](/docs/how_to/sequence/)\n",
+ "- [Streaming](/docs/how_to/streaming/)\n",
+ "\n",
+ ":::\n",
+ "```\n",
+ "\n",
+ "When building longer-running chains or [LangGraph](https://langchain-ai.github.io/langgraphjs/) agents, you may want to interrupt execution in situations such as a user leaving your app or submitting a new query.\n",
+ "\n",
+ "[LangChain Expression Language (LCEL)](/docs/concepts#langchain-expression-language) supports aborting runnables that are in-progress via a runtime [signal](https://developer.mozilla.org/en-US/docs/Web/API/AbortController/signal) option.\n",
+ "\n",
+ "```{=mdx}\n",
+ ":::caution Compatibility\n",
+ "\n",
+ "Built-in signal support requires `@langchain/core>=0.2.20`. Please see here for a [guide on upgrading](/docs/how_to/installation/#installing-integration-packages).\n",
+ "\n",
+ ":::\n",
+ "```\n",
+ "\n",
+ "**Note:** Individual integrations like chat models or retrievers may have missing or differing implementations for aborting execution. Signal support as described in this guide will apply in between steps of a chain.\n",
+ "\n",
+ "To see how this works, construct a chain such as the one below that performs [retrieval-augmented generation](/docs/tutorials/rag). It answers questions by first searching the web using [Tavily](/docs/integrations/retrievers/tavily), then passing the results to a chat model to generate a final answer:\n",
+ "\n",
+ "```{=mdx}\n",
+ "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
+ "\n",
+ "\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "// @lc-docs-hide-cell\n",
+ "import { ChatAnthropic } from \"@langchain/anthropic\";\n",
+ "\n",
+ "const llm = new ChatAnthropic({\n",
+ " model: \"claude-3-5-sonnet-20240620\",\n",
+ "});"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import { TavilySearchAPIRetriever } from \"@langchain/community/retrievers/tavily_search_api\";\n",
+ "import type { Document } from \"@langchain/core/documents\";\n",
+ "import { StringOutputParser } from \"@langchain/core/output_parsers\";\n",
+ "import { ChatPromptTemplate } from \"@langchain/core/prompts\";\n",
+ "import { RunnablePassthrough, RunnableSequence } from \"@langchain/core/runnables\";\n",
+ "\n",
+ "const formatDocsAsString = (docs: Document[]) => {\n",
+ " return docs.map((doc) => doc.pageContent).join(\"\\n\\n\")\n",
+ "}\n",
+ "\n",
+ "const retriever = new TavilySearchAPIRetriever({\n",
+ " k: 3,\n",
+ "});\n",
+ "\n",
+ "const prompt = ChatPromptTemplate.fromTemplate(`\n",
+ "Use the following context to answer questions to the best of your ability:\n",
+ "\n",
+ "\n",
+ "{context}\n",
+ "\n",
+ "\n",
+ "Question: {question}`)\n",
+ "\n",
+ "const chain = RunnableSequence.from([\n",
+ " {\n",
+ " context: retriever.pipe(formatDocsAsString),\n",
+ " question: new RunnablePassthrough(),\n",
+ " },\n",
+ " prompt,\n",
+ " llm,\n",
+ " new StringOutputParser(),\n",
+ "]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you invoke it normally, you can see it returns up-to-date information:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Based on the provided context, the current weather in San Francisco is:\n",
+ "\n",
+ "Temperature: 17.6°C (63.7°F)\n",
+ "Condition: Sunny\n",
+ "Wind: 14.4 km/h (8.9 mph) from WSW direction\n",
+ "Humidity: 74%\n",
+ "Cloud cover: 15%\n",
+ "\n",
+ "The information indicates it's a sunny day with mild temperatures and light winds. The data appears to be from August 2, 2024, at 17:00 local time.\n"
+ ]
+ }
+ ],
+ "source": [
+ "await chain.invoke(\"what is the current weather in SF?\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, let's interrupt it early. Initialize an [`AbortController`](https://developer.mozilla.org/en-US/docs/Web/API/AbortController) and pass its `signal` property into the chain execution. To illustrate the fact that the cancellation occurs as soon as possible, set a timeout of 100ms:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Error: Aborted\n",
+ " at EventTarget. (/Users/jacoblee/langchain/langchainjs/langchain-core/dist/utils/signal.cjs:19:24)\n",
+ " at [nodejs.internal.kHybridDispatch] (node:internal/event_target:825:20)\n",
+ " at EventTarget.dispatchEvent (node:internal/event_target:760:26)\n",
+ " at abortSignal (node:internal/abort_controller:370:10)\n",
+ " at AbortController.abort (node:internal/abort_controller:392:5)\n",
+ " at Timeout._onTimeout (evalmachine.:7:29)\n",
+ " at listOnTimeout (node:internal/timers:573:17)\n",
+ " at process.processTimers (node:internal/timers:514:7)\n",
+ "timer1: 103.204ms\n"
+ ]
+ }
+ ],
+ "source": [
+ "const controller = new AbortController();\n",
+ "\n",
+ "const startTimer = console.time(\"timer1\");\n",
+ "\n",
+ "setTimeout(() => controller.abort(), 100);\n",
+ "\n",
+ "try {\n",
+ " await chain.invoke(\"what is the current weather in SF?\", {\n",
+ " signal: controller.signal,\n",
+ " });\n",
+ "} catch (e) {\n",
+ " console.log(e);\n",
+ "}\n",
+ "\n",
+ "console.timeEnd(\"timer1\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "And you can see that execution ends after just over 100ms. Looking at [this LangSmith trace](https://smith.langchain.com/public/63c04c3b-2683-4b73-a4f7-fb12f5cb9180/r), you can see that the model is never called.\n",
+ "\n",
+ "## Streaming\n",
+ "\n",
+ "You can pass a `signal` when streaming too. This gives you more control over using a `break` statement within the `for await... of` loop to cancel the current run, which will only trigger after final output has already started streaming. The below example uses a `break` statement - note the time elapsed before cancellation occurs:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "chunk \n",
+ "timer2: 3.990s\n"
+ ]
+ }
+ ],
+ "source": [
+ "const startTimer2 = console.time(\"timer2\");\n",
+ "\n",
+ "const stream = await chain.stream(\"what is the current weather in SF?\");\n",
+ "\n",
+ "for await (const chunk of stream) {\n",
+ " console.log(\"chunk\", chunk);\n",
+ " break;\n",
+ "}\n",
+ "\n",
+ "console.timeEnd(\"timer2\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now compare this to using a signal. Note that you will need to wrap the stream in a `try/catch` block:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Error: Aborted\n",
+ " at EventTarget. (/Users/jacoblee/langchain/langchainjs/langchain-core/dist/utils/signal.cjs:19:24)\n",
+ " at [nodejs.internal.kHybridDispatch] (node:internal/event_target:825:20)\n",
+ " at EventTarget.dispatchEvent (node:internal/event_target:760:26)\n",
+ " at abortSignal (node:internal/abort_controller:370:10)\n",
+ " at AbortController.abort (node:internal/abort_controller:392:5)\n",
+ " at Timeout._onTimeout (evalmachine.:7:38)\n",
+ " at listOnTimeout (node:internal/timers:573:17)\n",
+ " at process.processTimers (node:internal/timers:514:7)\n",
+ "timer3: 100.684ms\n"
+ ]
+ }
+ ],
+ "source": [
+ "const controllerForStream = new AbortController();\n",
+ "\n",
+ "const startTimer3 = console.time(\"timer3\");\n",
+ "\n",
+ "setTimeout(() => controllerForStream.abort(), 100);\n",
+ "\n",
+ "try {\n",
+ " const streamWithSignal = await chain.stream(\"what is the current weather in SF?\", {\n",
+ " signal: controllerForStream.signal\n",
+ " });\n",
+ " for await (const chunk of streamWithSignal) {\n",
+ " console.log(chunk);\n",
+ " break;\n",
+ " } \n",
+ "} catch (e) {\n",
+ " console.log(e); \n",
+ "}\n",
+ "\n",
+ "console.timeEnd(\"timer3\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Related\n",
+ "\n",
+ "- [Pass through arguments from one step to the next](/docs/how_to/passthrough)\n",
+ "- [Dispatching custom events](/docs/how_to/callbacks_custom_events)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "TypeScript",
+ "language": "typescript",
+ "name": "tslab"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "mode": "typescript",
+ "name": "javascript",
+ "typescript": true
+ },
+ "file_extension": ".ts",
+ "mimetype": "text/typescript",
+ "name": "typescript",
+ "version": "3.7.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/core_docs/docs/how_to/index.mdx b/docs/core_docs/docs/how_to/index.mdx
index 1282b7d22897..0f4af5b3c37b 100644
--- a/docs/core_docs/docs/how_to/index.mdx
+++ b/docs/core_docs/docs/how_to/index.mdx
@@ -40,6 +40,7 @@ LangChain Expression Language is a way to create arbitrary custom chains. It is
- [How to: add message history](/docs/how_to/message_history)
- [How to: route execution within a chain](/docs/how_to/routing)
- [How to: add fallbacks](/docs/how_to/fallbacks)
+- [How to: cancel execution](/docs/how_to/cancel_execution/)
## Components
diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb b/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb
index 812ed2961124..5c05e476bf5b 100644
--- a/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb
+++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/pdf.ipynb
@@ -68,9 +68,7 @@
"source": [
"## Instantiation\n",
"\n",
- "Now we can instantiate our model object and load documents:\n",
- "\n",
- "- TODO: Update model instantiation with relevant params."
+ "Now we can instantiate our model object and load documents:"
]
},
{
diff --git a/docs/core_docs/docs/integrations/llms/bedrock.ipynb b/docs/core_docs/docs/integrations/llms/bedrock.ipynb
index be1ff211a38e..d052781f33a6 100644
--- a/docs/core_docs/docs/integrations/llms/bedrock.ipynb
+++ b/docs/core_docs/docs/integrations/llms/bedrock.ipynb
@@ -35,10 +35,6 @@
"## Overview\n",
"### Integration details\n",
"\n",
- "- TODO: Fill in table features.\n",
- "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n",
- "- TODO: Make sure API reference links are correct.\n",
- "\n",
"| Class | Package | Local | Serializable | [PY support](https://python.langchain.com/docs/integrations/llms/bedrock) | Package downloads | Package latest |\n",
"| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n",
"| [Bedrock](https://api.js.langchain.com/classes/langchain_community_llms_bedrock.Bedrock.html) | [@langchain/community](https://api.js.langchain.com/modules/langchain_community_llms_bedrock.html) | ❌ | ✅ | ✅ | ![NPM - Downloads](https://img.shields.io/npm/dm/@langchain/community?style=flat-square&label=%20&) | ![NPM - Version](https://img.shields.io/npm/v/@langchain/community?style=flat-square&label=%20&) |\n",
diff --git a/docs/core_docs/scripts/quarto-build.js b/docs/core_docs/scripts/quarto-build.js
index 128111f8d095..1caac722ea03 100644
--- a/docs/core_docs/scripts/quarto-build.js
+++ b/docs/core_docs/scripts/quarto-build.js
@@ -2,7 +2,8 @@ const fs = require("node:fs");
const { glob } = require("glob");
const { execSync } = require("node:child_process");
-const IGNORED_CELL_REGEX = /```\w*?\n\/\/ ?@lc-docs-hide-cell\n[\s\S]*?```/g;
+const IGNORED_CELL_REGEX =
+ /^```\s?\w*?[\s\S]\/\/ ?@lc-docs-hide-cell[\s\S]*?^```/gm;
const LC_TS_IGNORE_REGEX = /\/\/ ?@lc-ts-ignore\n/g;
async function main() {
@@ -20,7 +21,7 @@ async function main() {
fs.writeFileSync(pathToRootGitignore, gitignore);
for (const renamedFilepath of allRenames) {
if (fs.existsSync(renamedFilepath)) {
- let content = fs.readFileSync(renamedFilepath).toString();
+ let content = fs.readFileSync(renamedFilepath, "utf-8").toString();
if (
content.match(IGNORED_CELL_REGEX) ||
content.match(LC_TS_IGNORE_REGEX)