Remove dependency on RetrieveAssistantAgent for RetrieveChat (#3320)

* Remove deps on RetrieveAssistantAgent for getting human input * Terminate when no more context * Add deprecation warning message * Clean up RetrieveAssistantAgent, part 1 * Update version * Clean up docs and notebooks
microsoft · Aug 28, 2024 · e6247c7 · e6247c7
1 parent 0f5dda2
commit e6247c7
Show file tree

Hide file tree

Showing 17 changed files with 536 additions and 448 deletions.
diff --git a/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Callable, Dict, List, Literal, Optional
 
 from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent
@@ -93,6 +94,11 @@ def __init__(
              **kwargs (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
 
         """
+        warnings.warn(
+            "The QdrantRetrieveUserProxyAgent is deprecated. Please use the RetrieveUserProxyAgent instead, set `vector_db` to `qdrant`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(name, human_input_mode, is_termination_msg, retrieve_config, **kwargs)
         self._client = self._retrieve_config.get("client", QdrantClient(":memory:"))
         self._embedding_model = self._retrieve_config.get("embedding_model", "BAAI/bge-small-en-v1.5")

diff --git a/autogen/agentchat/contrib/retrieve_assistant_agent.py b/autogen/agentchat/contrib/retrieve_assistant_agent.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from autogen.agentchat.agent import Agent
@@ -16,6 +17,11 @@ class RetrieveAssistantAgent(AssistantAgent):
     """
 
     def __init__(self, *args, **kwargs):
+        warnings.warn(
+            "The RetrieveAssistantAgent is deprecated. Please use the AssistantAgent instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         super().__init__(*args, **kwargs)
         self.register_reply(Agent, RetrieveAssistantAgent._generate_retrieve_assistant_reply)
 

diff --git a/autogen/agentchat/contrib/retrieve_user_proxy_agent.py b/autogen/agentchat/contrib/retrieve_user_proxy_agent.py
@@ -189,7 +189,7 @@ def __init__(
                     interactive retrieval. Default is True.
                 - `collection_name` (Optional, str) - the name of the collection.
                     If key not provided, a default name `autogen-docs` will be used.
-                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is True.
+                - `get_or_create` (Optional, bool) - Whether to get the collection if it exists. Default is False.
                 - `overwrite` (Optional, bool) - Whether to overwrite the collection if it exists. Default is False.
                     Case 1. if the collection does not exist, create the collection.
                     Case 2. the collection exists, if overwrite is True, it will overwrite the collection.
@@ -306,6 +306,10 @@ def retrieve_docs(self, problem: str, n_results: int = 20, search_string: str =
                 self._db_config["embedding_function"] = self._embedding_function
             self._vector_db = VectorDBFactory.create_vector_db(db_type=self._vector_db, **self._db_config)
         self.register_reply(Agent, RetrieveUserProxyAgent._generate_retrieve_user_reply, position=2)
+        self.register_hook(
+            hookable_method="process_message_before_send",
+            hook=self._check_update_context_before_send,
+        )
 
     def _init_db(self):
         if not self._vector_db:
@@ -400,6 +404,34 @@ def _is_termination_msg_retrievechat(self, message):
         update_context_case1, update_context_case2 = self._check_update_context(message)
         return not (contain_code or update_context_case1 or update_context_case2)
 
+    def _check_update_context_before_send(self, sender, message, recipient, silent):
+        if not isinstance(message, (str, dict)):
+            return message
+        elif isinstance(message, dict):
+            msg_text = message.get("content", message)
+        else:
+            msg_text = message
+
+        if "UPDATE CONTEXT" == msg_text.strip().upper():
+            doc_contents = self._get_context(self._results)
+
+            # Always use self.problem as the query text to retrieve docs, but each time we replace the context with the
+            # next similar docs in the retrieved doc results.
+            if not doc_contents:
+                for _tmp_retrieve_count in range(1, 5):
+                    self._reset(intermediate=True)
+                    self.retrieve_docs(
+                        self.problem, self.n_results * (2 * _tmp_retrieve_count + 1), self._search_string
+                    )
+                    doc_contents = self._get_context(self._results)
+                    if doc_contents or self.n_results * (2 * _tmp_retrieve_count + 1) >= len(self._results[0]):
+                        break
+            msg_text = self._generate_message(doc_contents, task=self._task)
+
+        if isinstance(message, dict):
+            message["content"] = msg_text
+        return message
+
     @staticmethod
     def get_max_tokens(model="gpt-3.5-turbo"):
         if "32k" in model:

diff --git a/autogen/version.py b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.2.34"
+__version__ = "0.2.35"
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
diff --git a/notebook/agentchat_RetrieveChat_mongodb.ipynb b/notebook/agentchat_RetrieveChat_mongodb.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -58,7 +58,7 @@
     "import os\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
@@ -83,7 +83,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -111,8 +111,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
@@ -123,16 +123,9 @@
     ")\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
-    "# **NOTE** Upon the first time adding in the documents, initial query may be slower due to index creation and document indexing time\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/mongodb\n",
+    "# for more information on the RetrieveUserProxyAgent and MongoDBAtlasVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -142,9 +135,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"mongodb\",  # MongoDB Atlas database\n",

diff --git a/notebook/agentchat_RetrieveChat_pgvector.ipynb b/notebook/agentchat_RetrieveChat_pgvector.ipynb
@@ -10,7 +10,7 @@
     "AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation through multi-agent conversation.\n",
     "Please find documentation about this feature [here](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat).\n",
     "\n",
-    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveAssistantAgent` and  `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
+    "RetrieveChat is a conversational system for retrieval-augmented code generation and question answering. In this notebook, we demonstrate how to utilize RetrieveChat to generate code and answer questions based on customized documentations that are not present in the LLM's training dataset. RetrieveChat uses the `AssistantAgent` and `RetrieveUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (e.g., [Automated Task Solving with Code Generation, Execution & Debugging](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_auto_feedback_from_code_execution.ipynb)). Essentially, `RetrieveUserProxyAgent` implement a different auto-reply mechanism corresponding to the RetrieveChat prompts.\n",
     "\n",
     "## Table of Contents\n",
     "We'll demonstrate six examples of using RetrieveChat for code generation and question answering:\n",
@@ -92,29 +92,13 @@
     "from sentence_transformers import SentenceTransformer\n",
     "\n",
     "import autogen\n",
-    "from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent\n",
+    "from autogen import AssistantAgent\n",
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "\n",
     "# Accepted file formats for that can be stored in\n",
     "# a vector database instance\n",
     "from autogen.retrieve_utils import TEXT_FORMATS\n",
     "\n",
-    "config_list = [\n",
-    "    {\n",
-    "        \"model\": \"Meta-Llama-3-8B-Instruct-imatrix\",\n",
-    "        \"api_key\": \"YOUR_API_KEY\",\n",
-    "        \"base_url\": \"http://localhost:8080/v1\",\n",
-    "        \"api_type\": \"openai\",\n",
-    "    },\n",
-    "    {\"model\": \"gpt-3.5-turbo-0125\", \"api_key\": \"YOUR_API_KEY\", \"api_type\": \"openai\"},\n",
-    "    {\n",
-    "        \"model\": \"gpt-35-turbo\",\n",
-    "        \"base_url\": \"...\",\n",
-    "        \"api_type\": \"azure\",\n",
-    "        \"api_version\": \"2023-07-01-preview\",\n",
-    "        \"api_key\": \"...\",\n",
-    "    },\n",
-    "]\n",
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    file_location=\".\",\n",
@@ -136,7 +120,7 @@
     "\n",
     "## Construct agents for RetrieveChat\n",
     "\n",
-    "We start by initializing the `RetrieveAssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for RetrieveAssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
+    "We start by initializing the `AssistantAgent` and `RetrieveUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for AssistantAgent. The detailed instructions are given in the user message. Later we will use the `RetrieveUserProxyAgent.message_generator` to combine the instructions and a retrieval augmented generation task for an initial prompt to be sent to the LLM assistant."
    ]
   },
   {
@@ -173,8 +157,8 @@
     }
    ],
    "source": [
-    "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
-    "assistant = RetrieveAssistantAgent(\n",
+    "# 1. create an AssistantAgent instance named \"assistant\"\n",
+    "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    system_message=\"You are a helpful assistant. You must always reply with some form of text.\",\n",
     "    llm_config={\n",
@@ -191,15 +175,9 @@
     "sentence_transformer_ef = SentenceTransformer(\"all-distilroberta-v1\").encode\n",
     "\n",
     "# 2. create the RetrieveUserProxyAgent instance named \"ragproxyagent\"\n",
-    "# By default, the human_input_mode is \"ALWAYS\", which means the agent will ask for human input at every step. We set it to \"NEVER\" here.\n",
-    "# `docs_path` is the path to the docs directory. It can also be the path to a single file, or the url to a single file. By default,\n",
-    "# it is set to None, which works only if the collection is already created.\n",
-    "# `task` indicates the kind of task we're working on. In this example, it's a `code` task.\n",
-    "# `chunk_token_size` is the chunk token size for the retrieve chat. By default, it is set to `max_tokens * 0.6`, here we set it to 2000.\n",
-    "# `custom_text_types` is a list of file types to be processed. Default is `autogen.retrieve_utils.TEXT_FORMATS`.\n",
-    "# This only applies to files under the directories in `docs_path`. Explicitly included files and urls will be chunked regardless of their types.\n",
-    "# In this example, we set it to [\"non-existent-type\"] to only process markdown files. Since no \"non-existent-type\" files are included in the `websit/docs`,\n",
-    "# no files there will be processed. However, the explicitly included urls will still be processed.\n",
+    "# Refer to https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/retrieve_user_proxy_agent\n",
+    "# and https://microsoft.github.io/autogen/docs/reference/agentchat/contrib/vectordb/pgvectordb\n",
+    "# for more information on the RetrieveUserProxyAgent and PGVectorDB\n",
     "ragproxyagent = RetrieveUserProxyAgent(\n",
     "    name=\"ragproxyagent\",\n",
     "    human_input_mode=\"NEVER\",\n",
@@ -209,9 +187,7 @@
     "        \"docs_path\": [\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Examples/Integrate%20-%20Spark.md\",\n",
     "            \"https://raw.githubusercontent.com/microsoft/FLAML/main/website/docs/Research.md\",\n",
-    "            os.path.join(os.path.abspath(\"\"), \"..\", \"website\", \"docs\"),\n",
     "        ],\n",
-    "        \"custom_text_types\": [\"non-existent-type\"],\n",
     "        \"chunk_token_size\": 2000,\n",
     "        \"model\": config_list[0][\"model\"],\n",
     "        \"vector_db\": \"pgvector\",  # PGVector database\n",