Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds Tavily Search API retriever #11314

Merged
merged 2 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions docs/extras/integrations/retrievers/tavily.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tavily Search API\n",
"\n",
"[Tavily's Search API](https://tavily.com) is a search engine built specifically for AI agents (LLMs), delivering real-time, accurate, and factual results at speed.\n",
"\n",
"## Usage\n",
"\n",
"For a full list of allowed arguments, see [the official documentation](https://app.tavily.com/documentation/python). You can also pass any param to the SDK via a `kwargs` dictionary."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# %pip install tavily-python"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Nintendo Designer (s) Hidemaro Fujibayashi (director) Eiji Aonuma (producer/group manager) Release date (s) United States of America: • March 3, 2017 Japan: • March 3, 2017 Australia / New Zealand: • March 2, 2017 Belgium: • March 3, 2017 Hong Kong: • Feburary 1, 2018 South Korea: • February 1, 2018 The UK / Ireland: • March 3, 2017 Content ratings', metadata={'title': 'The Legend of Zelda: Breath of the Wild - Zelda Wiki', 'source': 'https://zelda.fandom.com/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.96994, 'images': None}),\n",
" Document(page_content='02/01/23 Nintendo Switch Online member exclusive: Save on two digital games Read more 09/13/22 Out of the Shadows … the Legend of Zelda: Tears of the Kingdom Launches for Nintendo Switch on May...', metadata={'title': 'The Legend of Zelda™: Breath of the Wild - Nintendo', 'source': 'https://www.nintendo.com/store/products/the-legend-of-zelda-breath-of-the-wild-switch/', 'score': 0.94346, 'images': None}),\n",
" Document(page_content='Now we finally have a concrete release date of May 12, 2023. The date was announced alongside this brief (and mysterious) new trailer that also confirmed its title: The Legend of Zelda: Tears...', metadata={'title': 'The Legend of Zelda: Tears of the Kingdom: Release Date, Gameplay ... - IGN', 'source': 'https://www.ign.com/articles/the-legend-of-zelda-breath-of-the-wild-2-release-date-gameplay-news-rumors', 'score': 0.94145, 'images': None}),\n",
" Document(page_content='It was eventually released on March 3, 2017, as a launch game for the Switch and the final Nintendo game for the Wii U. It received widespread acclaim and won numerous Game of the Year accolades. Critics praised its open-ended gameplay, open-world design, and attention to detail, though some criticized its technical performance.', metadata={'title': 'The Legend of Zelda: Breath of the Wild - Wikipedia', 'source': 'https://en.wikipedia.org/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild', 'score': 0.92102, 'images': None})]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever\n",
"\n",
"os.environ[\"TAVILY_API_KEY\"] = \"YOUR_API_KEY\"\n",
"\n",
"retriever = TavilySearchAPIRetriever(k=4)\n",
"\n",
"retriever.invoke(\"what year was breath of the wild released?\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions libs/langchain/langchain/retrievers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from langchain.retrievers.remote_retriever import RemoteLangChainRetriever
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.retrievers.svm import SVMRetriever
from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
from langchain.retrievers.tfidf import TFIDFRetriever
from langchain.retrievers.time_weighted_retriever import (
TimeWeightedVectorStoreRetriever,
Expand Down Expand Up @@ -82,6 +83,7 @@
"RemoteLangChainRetriever",
"SVMRetriever",
"SelfQueryRetriever",
"TavilySearchAPIRetriever",
"TFIDFRetriever",
"BM25Retriever",
"TimeWeightedVectorStoreRetriever",
Expand Down
82 changes: 82 additions & 0 deletions libs/langchain/langchain/retrievers/tavily_search_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
from enum import Enum
from typing import Any, Dict, List, Optional

from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.schema.retriever import BaseRetriever


class SearchDepth(Enum):
BASIC = "basic"
ADVANCED = "advanced"


class TavilySearchAPIRetriever(BaseRetriever):
"""Tavily Search API retriever."""

k: int = 10
include_generated_answer: bool = False
include_raw_content: bool = False
include_images: bool = False
search_depth: SearchDepth = SearchDepth.BASIC
include_domains: Optional[List[str]] = None
exclude_domains: Optional[List[str]] = None
kwargs: Optional[Dict[str, Any]] = {}
api_key: Optional[str] = None

def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
try:
from tavily import Client
except ImportError:
raise ValueError(
"Tavily python package not found. "
"Please install it with `pip install tavily-python`."
)

tavily = Client(api_key=self.api_key or os.environ["TAVILY_API_KEY"])
max_results = self.k if not self.include_generated_answer else self.k - 1
response = tavily.search(
query=query,
max_results=max_results,
search_depth=self.search_depth.value,
include_answer=self.include_generated_answer,
include_domains=self.include_domains,
exclude_domains=self.exclude_domains,
include_raw_content=self.include_raw_content,
include_images=self.include_images,
**self.kwargs
)
docs = [
Document(
page_content=result.get("content", "")
if not self.include_raw_content
else result.get("raw_content", ""),
metadata={
"title": result.get("title", ""),
"source": result.get("url", ""),
**{
k: v
for k, v in result.items()
if k not in ("content", "title", "url", "raw_content")
},
"images": response.get("images"),
},
)
for result in response.get("results")
]
if self.include_generated_answer:
docs = [
Document(
page_content=response.get("answer", ""),
metadata={
"title": "Suggested Answer",
"source": "https://tavily.com/",
},
),
*docs,
]

return docs