From ca914cc02696d43c339745ea60da86ebed825812 Mon Sep 17 00:00:00 2001 From: Aymeric Date: Fri, 28 Jun 2024 19:44:55 +0200 Subject: [PATCH 1/4] Adds final answer tool for all agents --- docs/source/en/agents.md | 11 +++++++++-- src/transformers/agents/agents.py | 3 +-- src/transformers/agents/default_tools.py | 2 +- src/transformers/agents/prompts.py | 20 ++++++++++++++------ src/transformers/agents/tools.py | 1 - 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/docs/source/en/agents.md b/docs/source/en/agents.md index 2cacaed5902..3c805a7876d 100644 --- a/docs/source/en/agents.md +++ b/docs/source/en/agents.md @@ -188,7 +188,7 @@ You can still authorize additional imports by passing the authorized modules as >>> from transformers import ReactCodeAgent >>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) ->>>agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") +>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") (...) 'Hugging Face – Blog' @@ -256,6 +256,13 @@ agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_cus > Please make sure to define the `<>` string somewhere in the `template` so the agent is aware of the available tools. + +### Inspecting an agent run + +Here are a few useful attributes to inspect what happened after a run: +- `agent.logs` stores the fine-grained logs of the agent. At every step of the agent's run, everything gets stored in a dictionary that then is appended to `agent.logs`. +- Running `agent.write_inner_memory_from_logs()` creates an inner memory of the agent's logs for the LLM to view, as a list of chat messages. This method goes over each step of the log and only stores what it's interested in as a message: for instance, it will save the system prompt and task in separate messages, then for each step it will store the LLM output as a message, and the tool call output as another message. Use this if you want a higher-level view of what has happened - but not every log will be transcripted by this method. + ## Tools A tool is an atomic function to be used by an agent. @@ -379,7 +386,7 @@ And the output: `"The most downloaded model for the 'text-to-video' task is ByteDance/AnimateDiff-Lightning."` -### Manage agent toolbox +### Manage your agent's toolbox If you have already initialized an agent, it is inconvenient to reinitialize it from scratch with a tool you want to use. With Transformers, you can manage an agent's toolbox by adding or replacing a tool. diff --git a/src/transformers/agents/agents.py b/src/transformers/agents/agents.py index 63a2c3889ba..fa066b83846 100644 --- a/src/transformers/agents/agents.py +++ b/src/transformers/agents/agents.py @@ -169,6 +169,7 @@ def __init__(self, tools: List[Tool], add_base_tools: bool = False): self._tools = {tool.name: tool for tool in tools} if add_base_tools: self.add_base_tools() + self.add_tool(FinalAnswerTool()) self._load_tools_if_needed() def add_base_tools(self, add_python_interpreter: bool = False): @@ -631,8 +632,6 @@ def __init__( tool_description_template=tool_description_template, **kwargs, ) - if "final_answer" not in self._toolbox.tools: - self._toolbox.add_tool(FinalAnswerTool()) def provide_final_answer(self, task) -> str: """ diff --git a/src/transformers/agents/default_tools.py b/src/transformers/agents/default_tools.py index 6ab971a4803..4a6f6dad3dc 100644 --- a/src/transformers/agents/default_tools.py +++ b/src/transformers/agents/default_tools.py @@ -180,7 +180,7 @@ def forward(self, code): class FinalAnswerTool(Tool): name = "final_answer" - description = "Provides a final answer to the given problem" + description = "Provides a final answer to the given problem." inputs = {"answer": {"type": "text", "description": "The final answer to the problem"}} output_type = "any" diff --git a/src/transformers/agents/prompts.py b/src/transformers/agents/prompts.py index 3a867e8dc9b..fe431c4e215 100644 --- a/src/transformers/agents/prompts.py +++ b/src/transformers/agents/prompts.py @@ -52,8 +52,9 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): To help you, I will give you access to a set of tools that you can use. Each tool is a Python function and has a description explaining the task it performs, the inputs it expects and the outputs it returns. You should first explain which tool you will use to perform the task and for what reason, then write the code in Python. Each instruction in Python should be a simple assignment. You can print intermediate results if it makes sense to do so. +In the end, use tool 'final_answer' to return your answer, its argument will be what gets returned. You can use imports in your code, but only from the following list of modules: <> -Be sure to provide a 'Code:' token, else the system will be stuck in a loop. +Be sure to provide a 'Code:' token, else the run will fail. Tools: <> @@ -68,7 +69,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): translated_question = translator(question=question, src_lang="French", tgt_lang="English") print(f"The translated question is {translated_question}.") answer = image_qa(image=image, question=translated_question) -print(f"The answer is {answer}") +final_answer(f"The answer is {answer}") ``` --- @@ -80,6 +81,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): answer = document_qa(document, question="What is the oldest person?") print(f"The answer is {answer}.") image = image_generator(answer) +final_answer(image) ``` --- @@ -89,6 +91,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): Code: ```py image = image_generator(prompt=caption) +final_answer(image) ``` --- @@ -100,6 +103,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): summarized_text = summarizer(text) print(f"Summary: {summarized_text}") audio_summary = text_reader(summarized_text) +final_answer(audio_summary) ``` --- @@ -111,6 +115,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): answer = text_qa(text=text, question=question) print(f"The answer is {answer}.") image = image_generator(answer) +final_answer(image) ``` --- @@ -120,10 +125,11 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): Code: ```py caption = image_captioner(image) +final_answer(caption) ``` --- -Above example were using tools that might not exist for you. You only have access to those Tools: +Above example were using tools that might not exist for you. You only have acces to those Tools: <> Remember to make sure that variables you use are all defined. @@ -145,7 +151,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): "action_input": $INPUT } -Make sure to have the $INPUT as a dictionary in the right format for the tool you are using, and do not put variable names as input if you can find the right values. +Make sure to have the $INPUT as a dictionnary in the right format for the tool you are using, and do not put variable names as input if you can find the right values. You should ALWAYS use the following format: @@ -250,7 +256,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): } -Above example were using notional tools that might not exist for you. You only have access to those tools: +Above example were using notional tools that might not exist for you. You only have acces to those tools: <> Here are the rules you should always follow to solve your task: @@ -357,7 +363,9 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): 4. Take care to not chain too many sequential tool calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another tool call that depends on its output in the same block: rather output results with print() to use them in the next block. 5. Call a tool only when needed, and never re-do a tool call that you previously did with the exact same parameters. 6. Don't name any new variable with the same name as a tool: for instance don't name a variable 'final_answer'. -7. You can use imports in your code, but only from the following list of modules: <> +7. Never create any notional variables in our code, as having these in your logs might derail you from the true variables. +8. You can use imports in your code, but only from the following list of modules: <> +9. Don't give up! You're in charge of solving the task, not providing directions to solve it. Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000. """ diff --git a/src/transformers/agents/tools.py b/src/transformers/agents/tools.py index b2821aa5c57..b8b20d8e0e6 100644 --- a/src/transformers/agents/tools.py +++ b/src/transformers/agents/tools.py @@ -650,7 +650,6 @@ def fn(*args, **kwargs): "text-to-speech": "TextToSpeechTool", "translation": "TranslationTool", "python_interpreter": "PythonInterpreterTool", - "final_answer": "FinalAnswerTool", } From f0819d720a184fe5f237978ab49bd0832fb5af8e Mon Sep 17 00:00:00 2001 From: Aymeric Date: Mon, 1 Jul 2024 19:39:13 +0200 Subject: [PATCH 2/4] Typo --- src/transformers/agents/prompts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/agents/prompts.py b/src/transformers/agents/prompts.py index fe431c4e215..515ce3d439a 100644 --- a/src/transformers/agents/prompts.py +++ b/src/transformers/agents/prompts.py @@ -151,7 +151,7 @@ def download_prompt(prompt_or_repo_id, agent_name, mode="run"): "action_input": $INPUT } -Make sure to have the $INPUT as a dictionnary in the right format for the tool you are using, and do not put variable names as input if you can find the right values. +Make sure to have the $INPUT as a dictionary in the right format for the tool you are using, and do not put variable names as input if you can find the right values. You should ALWAYS use the following format: From c060796198308a6377a990f7e1944080234eab96 Mon Sep 17 00:00:00 2001 From: Aymeric Date: Tue, 2 Jul 2024 11:33:10 +0200 Subject: [PATCH 3/4] Add clarification in doc --- docs/source/en/agents.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/agents.md b/docs/source/en/agents.md index 3c805a7876d..d1c550f5d32 100644 --- a/docs/source/en/agents.md +++ b/docs/source/en/agents.md @@ -50,7 +50,7 @@ We implement two versions of ReactJsonAgent: ![Framework of a React Agent](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) -For example, here is how a ReAct agent would work its way through the following question. +For example, here is how a ReAct Code agent would work its way through the following question. ```py3 >>> agent.run( From 960fd3347466c1099f0da8043c02b4353bb9f71a Mon Sep 17 00:00:00 2001 From: Aymeric Date: Tue, 2 Jul 2024 11:39:35 +0200 Subject: [PATCH 4/4] Put final_answer tool adition in agent for clarity --- src/transformers/agents/agents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/agents/agents.py b/src/transformers/agents/agents.py index fa066b83846..d1756d2e9e3 100644 --- a/src/transformers/agents/agents.py +++ b/src/transformers/agents/agents.py @@ -169,7 +169,6 @@ def __init__(self, tools: List[Tool], add_base_tools: bool = False): self._tools = {tool.name: tool for tool in tools} if add_base_tools: self.add_base_tools() - self.add_tool(FinalAnswerTool()) self._load_tools_if_needed() def add_base_tools(self, add_python_interpreter: bool = False): @@ -338,6 +337,7 @@ def __init__( self._toolbox.add_base_tools(add_python_interpreter=(self.__class__ == ReactJsonAgent)) else: self._toolbox = Toolbox(tools, add_base_tools=add_base_tools) + self._toolbox.add_tool(FinalAnswerTool()) self.system_prompt = format_prompt_with_tools( self._toolbox, self.system_prompt_template, self.tool_description_template