Skip to content

Commit

Permalink
Websearch Improvements (#1184)
Browse files Browse the repository at this point in the history
* only search if starts with http

* lint

* Clean up

* Add saved browsed links

* Add endpoints, fix lint

* Fix ref for get_browsed_links

* Use a list of urls for browsed_links

* Code clean up

* Clean up duplicate code

* Increase relevance score

* Skip injection if 0-3 collection to avoid duplicate injection

* Less memory initializations

* Initialize classes at init

* use website reader

* Move all refs to get_web_content to log browsed links

* Reduce memory initializations

* Don't browse links on websearch prompt

* Add delete memories by external source

* Improve delete browsed links

* Clean up docs

* Use append to add context

* Use append on context

* lint
  • Loading branch information
Josh-XT authored May 21, 2024
1 parent 0d468a6 commit f8b580d
Show file tree
Hide file tree
Showing 12 changed files with 513 additions and 304 deletions.
9 changes: 9 additions & 0 deletions agixt/DBConnection.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ class AgentProvider(Base):
settings = relationship("AgentProviderSetting", backref="agent_provider")


class AgentBrowsedLink(Base):
__tablename__ = "agent_browsed_link"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
agent_id = Column(UUID(as_uuid=True), ForeignKey("agent.id"), nullable=False)
link = Column(Text, nullable=False)
timestamp = Column(DateTime, server_default=text("now()"))


class Agent(Base):
__tablename__ = "agent"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
Expand All @@ -104,6 +112,7 @@ class Agent(Base):
)
user_id = Column(UUID(as_uuid=True), ForeignKey("user.id"), nullable=True)
settings = relationship("AgentSetting", backref="agent") # One-to-many relationship
browsed_links = relationship("AgentBrowsedLink", backref="agent")
user = relationship("User", backref="agent")


Expand Down
177 changes: 60 additions & 117 deletions agixt/Interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
import base64
import uuid
from datetime import datetime
from readers.website import WebsiteReader
from readers.file import FileReader
from readers.youtube import YoutubeReader
from Websearch import Websearch
from Extensions import Extensions
from ApiClient import (
Expand Down Expand Up @@ -39,49 +37,46 @@ class Interactions:
def __init__(
self,
agent_name: str = "",
collection_number: int = 0,
user=DEFAULT_USER,
ApiClient=None,
):
self.ApiClient = ApiClient
self.user = user
if agent_name != "":
self.agent_name = agent_name
self.agent = Agent(self.agent_name, user=user, ApiClient=ApiClient)
self.agent = Agent(self.agent_name, user=user, ApiClient=self.ApiClient)
self.agent_commands = self.agent.get_commands_string()
self.websearch = Websearch(
collection_number=1,
agent=self.agent,
user=self.user,
ApiClient=self.ApiClient,
)
self.agent_memory = self.websearch.agent_memory
self.positive_feedback_memories = FileReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=2,
ApiClient=self.ApiClient,
user=self.user,
)
self.negative_feedback_memories = FileReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
ApiClient=ApiClient,
searxng_instance_url=(
self.agent.AGENT_CONFIG["settings"]["SEARXNG_INSTANCE_URL"]
if "SEARXNG_INSTANCE_URL" in self.agent.AGENT_CONFIG["settings"]
else ""
),
collection_number=3,
ApiClient=self.ApiClient,
user=self.user,
)
else:
self.agent_name = ""
self.agent = None
self.agent_commands = ""
self.agent_memory = WebsiteReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=int(collection_number),
ApiClient=ApiClient,
user=user,
)
self.yt = YoutubeReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=1,
ApiClient=ApiClient,
user=user,
)
self.stop_running_event = None
self.browsed_links = []
self.websearch = None
self.agent_memory = None
self.response = ""
self.failures = 0
self.user = user
self.chain = Chain(user=user)
self.cp = Prompts(user=user)
self.ApiClient = ApiClient

def custom_format(self, string, **kwargs):
if isinstance(string, list):
Expand Down Expand Up @@ -132,83 +127,73 @@ async def format_prompt(
context = []
else:
if user_input:
min_relevance_score = 0.0
min_relevance_score = 0.3
if "min_relevance_score" in kwargs:
try:
min_relevance_score = float(kwargs["min_relevance_score"])
except:
min_relevance_score = 0.0
min_relevance_score = 0.3
context = await self.agent_memory.get_memories(
user_input=user_input,
limit=top_results,
min_relevance_score=min_relevance_score,
)
positive_feedback = await WebsiteReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=2,
ApiClient=self.ApiClient,
user=self.user,
).get_memories(
positive_feedback = self.positive_feedback_memories.get_memories(
user_input=user_input,
limit=3,
min_relevance_score=0.7,
)
negative_feedback = await WebsiteReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=3,
ApiClient=self.ApiClient,
user=self.user,
).get_memories(
negative_feedback = self.negative_feedback_memories.get_memories(
user_input=user_input,
limit=3,
min_relevance_score=0.7,
)
if positive_feedback or negative_feedback:
context += f"The users input makes you to remember some feedback from previous interactions:\n"
context.append(
f"The users input makes you to remember some feedback from previous interactions:\n"
)
if positive_feedback:
context += f"Positive Feedback:\n{positive_feedback}\n"
context.append(f"Positive Feedback:\n{positive_feedback}\n")
if negative_feedback:
context += f"Negative Feedback:\n{negative_feedback}\n"
context.append(f"Negative Feedback:\n{negative_feedback}\n")
if websearch:
context += await WebsiteReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=1,
ApiClient=self.ApiClient,
user=self.user,
).get_memories(
user_input=user_input,
limit=top_results,
min_relevance_score=min_relevance_score,
)
if "inject_memories_from_collection_number" in kwargs:
if int(kwargs["inject_memories_from_collection_number"]) > 0:
context += await WebsiteReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=int(
kwargs["inject_memories_from_collection_number"]
),
ApiClient=self.ApiClient,
user=self.user,
).get_memories(
context.append(
await self.websearch.agent_memory.get_memories(
user_input=user_input,
limit=top_results,
min_relevance_score=min_relevance_score,
)
)
if "inject_memories_from_collection_number" in kwargs:
if int(kwargs["inject_memories_from_collection_number"]) > 3:
context.append(
await FileReader(
agent_name=self.agent_name,
agent_config=self.agent.AGENT_CONFIG,
collection_number=int(
kwargs["inject_memories_from_collection_number"]
),
ApiClient=self.ApiClient,
user=self.user,
).get_memories(
user_input=user_input,
limit=top_results,
min_relevance_score=min_relevance_score,
)
)
else:
context = []
if "context" in kwargs:
context += [kwargs["context"]]
context.append([kwargs["context"]])
if vision_response != "":
context.append(
f"{self.agent_name}'s visual description from viewing uploaded images by user in this interaction:\n{vision_response}\n"
)
if context != [] and context != "":
context = "\n".join(context)
context = f"The user's input causes you remember these things:\n{context}\n"
else:
context = ""
if vision_response != "":
context += f"{self.agent_name}'s visual description from viewing uploaded images by user in this interaction:\n{vision_response}\n"
if chain_name != "":
try:
for arg, value in kwargs.items():
Expand Down Expand Up @@ -469,51 +454,9 @@ async def run(
else:
websearch_timeout = 0
if browse_links != False:
links = re.findall(r"(?P<url>https?://[^\s]+)", user_input)
if links is not None and len(links) > 0:
for link in links:
if (
link not in self.websearch.browsed_links
and link != ""
and link != None
and link != "None"
):
logging.info(f"Browsing link: {link}")
self.websearch.browsed_links.append(link)
if str(link).startswith("https://www.youtube.com/watch?v="):
video_id = link.split("watch?v=")[1]
await self.yt.write_youtube_captions_to_memory(
video_id=video_id
)
link_list = None
else:
(
text_content,
link_list,
) = await self.agent_memory.write_website_to_memory(
url=link
)
if int(websearch_depth) > 0:
if link_list is not None and len(link_list) > 0:
i = 0
for sublink in link_list:
if sublink[1]:
if (
sublink[1]
not in self.websearch.browsed_links
and sublink[1] != ""
and sublink[1] != None
and sublink[1] != "None"
):
logging.info(f"Browsing link: {sublink[1]}")
if i <= websearch_depth:
(
text_content,
link_list,
) = await self.agent_memory.write_website_to_memory(
url=sublink[1]
)
i = i + 1
await self.websearch.browse_links_in_input(
user_input=user_input, search_depth=websearch_depth
)
if websearch:
if user_input == "":
if "primary_objective" in kwargs and "task" in kwargs:
Expand Down
27 changes: 26 additions & 1 deletion agixt/Memories.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,29 @@ async def get_memories(
response.append(metadata)
return response

def delete_memories_from_external_source(self, external_source: str):
collection = self.chroma_client.get_collection(name=self.collection_name)
if collection:
results = collection.query(
query_metadatas={"external_source_name": external_source},
include=["metadatas"],
)
ids = results["metadatas"][0]["id"]
if ids:
collection.delete(ids=ids)
return True
return False

def get_external_data_sources(self):
collection = self.chroma_client.get_collection(name=self.collection_name)
if collection:
results = collection.query(
include=["metadatas"],
)
external_sources = results["metadatas"][0]["external_source_name"]
return list(set(external_sources))
return []

def score_chunk(self, chunk: str, keywords: set) -> int:
"""Score a chunk based on the number of query keywords it contains."""
chunk_counter = Counter(chunk.split())
Expand Down Expand Up @@ -492,7 +515,9 @@ async def get_context(
min_relevance_score=0.7,
)
if positive_feedback or negative_feedback:
context += f"The users input makes you to remember some feedback from previous interactions:\n"
context.append(
f"The users input makes you to remember some feedback from previous interactions:\n"
)
if positive_feedback:
context += f"Positive Feedback:\n{positive_feedback}\n"
if negative_feedback:
Expand Down
10 changes: 10 additions & 0 deletions agixt/Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ class AgentConfig(BaseModel):
commands: Dict[str, Any]


class AgentBrowsedLinks(BaseModel):
agent_name: str
links: List[Dict[str, Any]]


class AgentCommands(BaseModel):
agent_name: str
commands: Dict[str, Any]
Expand All @@ -205,6 +210,11 @@ class HistoryModel(BaseModel):
page: int = 1


class ExternalSource(BaseModel):
external_source: str
collection_number: int = 0


class ConversationHistoryModel(BaseModel):
agent_name: str
conversation_name: str
Expand Down
Loading

0 comments on commit f8b580d

Please sign in to comment.