-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add web search (#580) bump:patch
* feat: add web search * feat: update requirements
- Loading branch information
Showing
10 changed files
with
218 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
60 changes: 60 additions & 0 deletions
60
libs/kotaemon/kotaemon/indices/retrievers/jina_web_search.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import requests | ||
from decouple import config | ||
|
||
from kotaemon.base import BaseComponent, RetrievedDocument | ||
|
||
JINA_API_KEY = config("JINA_API_KEY", default="") | ||
JINA_URL = config("JINA_URL", default="https://r.jina.ai/") | ||
|
||
|
||
class WebSearch(BaseComponent): | ||
"""WebSearch component for fetching data from the web | ||
using Jina API | ||
""" | ||
|
||
def run( | ||
self, | ||
text: str, | ||
*args, | ||
**kwargs, | ||
) -> list[RetrievedDocument]: | ||
if JINA_API_KEY == "": | ||
raise ValueError( | ||
"This feature requires JINA_API_KEY " | ||
"(get free one from https://jina.ai/reader)" | ||
) | ||
|
||
# setup the request | ||
api_url = f"https://s.jina.ai/{text}" | ||
headers = {"X-With-Generated-Alt": "true", "Accept": "application/json"} | ||
if JINA_API_KEY: | ||
headers["Authorization"] = f"Bearer {JINA_API_KEY}" | ||
|
||
response = requests.get(api_url, headers=headers) | ||
response.raise_for_status() | ||
response_dict = response.json() | ||
|
||
return [ | ||
RetrievedDocument( | ||
text=( | ||
"###URL: [{url}]({url})\n\n" | ||
"####{title}\n\n" | ||
"{description}\n" | ||
"{content}" | ||
).format( | ||
url=item["url"], | ||
title=item["title"], | ||
description=item["description"], | ||
content=item["content"], | ||
), | ||
metadata={ | ||
"file_name": "Web search", | ||
"type": "table", | ||
"llm_trulens_score": 1.0, | ||
}, | ||
) | ||
for item in response_dict["data"] | ||
] | ||
|
||
def generate_relevant_scores(self, text, documents: list[RetrievedDocument]): | ||
return documents |
57 changes: 57 additions & 0 deletions
57
libs/kotaemon/kotaemon/indices/retrievers/tavily_web_search.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from decouple import config | ||
|
||
from kotaemon.base import BaseComponent, RetrievedDocument | ||
|
||
TAVILY_API_KEY = config("TAVILY_API_KEY", default="") | ||
|
||
|
||
class WebSearch(BaseComponent): | ||
"""WebSearch component for fetching data from the web | ||
using Jina API | ||
""" | ||
|
||
def run( | ||
self, | ||
text: str, | ||
*args, | ||
**kwargs, | ||
) -> list[RetrievedDocument]: | ||
if TAVILY_API_KEY == "": | ||
raise ValueError( | ||
"This feature requires TAVILY_API_KEY " | ||
"(get free one from https://app.tavily.com/)" | ||
) | ||
|
||
try: | ||
from tavily import TavilyClient | ||
except ImportError: | ||
raise ImportError( | ||
"Please install `pip install tavily-python` to use this feature" | ||
) | ||
|
||
tavily_client = TavilyClient(api_key=TAVILY_API_KEY) | ||
results = tavily_client.search( | ||
query=text, | ||
search_depth="advanced", | ||
)["results"] | ||
context = "\n\n".join( | ||
"###URL: [{url}]({url})\n\n{content}".format( | ||
url=result["url"], | ||
content=result["content"], | ||
) | ||
for result in results | ||
) | ||
|
||
return [ | ||
RetrievedDocument( | ||
text=context, | ||
metadata={ | ||
"file_name": "Web search", | ||
"type": "table", | ||
"llm_trulens_score": 1.0, | ||
}, | ||
) | ||
] | ||
|
||
def generate_relevant_scores(self, text, documents: list[RetrievedDocument]): | ||
return documents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
WEB_SEARCH_COMMAND = "web" |
Oops, something went wrong.