diff --git a/flaml/autogen/agent/execution_agent.py b/flaml/autogen/agent/execution_agent.py index cafae427f9..b8c8ae3922 100644 --- a/flaml/autogen/agent/execution_agent.py +++ b/flaml/autogen/agent/execution_agent.py @@ -7,6 +7,10 @@ class ExecutionAgent(Agent): An execution agent can only communicate with other agents, and perform actions such as executing a command or code. """ + DEFAULT_SYSTEM_MESSAGE = """You are an execution agent. You can only communicate with other agents, and perform actions such as executing a command or code. + """ + AGENT_PREFIX = "execution_agent" + def __init__(self, name, system_message="", work_dir=None): super().__init__(name, system_message) self._word_dir = work_dir diff --git a/flaml/autogen/agent/human_agent.py b/flaml/autogen/agent/human_agent.py new file mode 100644 index 0000000000..8b157fa3d6 --- /dev/null +++ b/flaml/autogen/agent/human_agent.py @@ -0,0 +1,25 @@ +from .agent import Agent + + +class HumanAgent(Agent): + """Human Agent.""" + + DEFAULT_SYSTEM_MESSAGE = """You are human agent. You can give feedback to the sender. + """ + AGENT_PREFIX = "human_agent" + + def receive(self, message, sender): + """Receive a message from the sender agent. + Every time a message is received, the human agent will give feedback. + """ + super().receive(message, sender) + print("Human agent received message: ", message) + # give feedback to the sender via standard input + print("Please give feedback to the sender (press enter to skip): ") + feedback = input() + if feedback: + self._send(feedback, sender) + + def retrieve_conversation(self, agent_name): + """retrieve the conversation with the agent""" + return self._conversations[agent_name][-1]["content"] diff --git a/flaml/autogen/agent/math_agent.py b/flaml/autogen/agent/math_agent.py new file mode 100644 index 0000000000..5472ec6d2b --- /dev/null +++ b/flaml/autogen/agent/math_agent.py @@ -0,0 +1,231 @@ +from .agent import Agent +from .execution_agent import ExecutionAgent +from .reflection_agent import ReflectionAgent +from flaml.autogen.code_utils import DEFAULT_MODEL, FAST_MODEL +from flaml import oai +import copy +from flaml.autogen.code_utils import extract_code + + +class MathAgent(Agent): + """Solve a math problem. + Most of the code is adopted from the math_solver.py file in Yiran's PR: + https://github.com/microsoft/FLAML/blob/ac11d2a7bb91f0f210ce0c67ec7b628d967e27b5/flaml/autogen/math/math_solver.py + """ + + DEFAULT_SYSTEM_MESSAGE = """You are a helpful assistant. + """ + AGENT_PREFIX = "math_agent" + + DEFAULT_CONFIG = { + "model": DEFAULT_MODEL, # default model is gpt-4 + } + EXECUTION_AGENT_PREFIX = "execution_agent4" + SUCCESS_EXIT_CODE = "exitcode: 0\n" + REFLECTION_AGENT_PREFIX = "reflection_agent4" + + PROMPTS = { + "v3.1python": """Let's use python to solve a math problem. + Query requirements: + You should always use 'print' function for the output, and use fractions/radical forms instead of decimal. + You must following the formats below to write your code (otherwise it will not be recognized): + ```python + # your code + ``` + First state the key idea to solve the problem. You may choose from 3 ways to solve the problem: + Case 1: If possible, write a program to directly solve it. If the problem involves enumerations, try to write a loop to iterate over all situations. Put your reasoning as comments in the code. + Case 2: If the problem only involve simple calculations or is mostly reasoning, you can solve it by yourself directly. You can use python to check calculations if necessary. + Case 3: If the problem cannot be handled with the two ways above, please follow this process: + 1. Solve the problem step by step (do not overdivide the steps). + 2. Take out any queries that can be asked through python (for example, any calculations or equations that can be calculated). + 3. Wait for me to give the results. + 4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning. + After all the queries are run and you get the answer, put the answer in \\boxed{}. + """, + "system_python": """You are a helpful assistant who can help users solve math problems. + First state the key idea to solve the problem. You may choose from 3 ways to solve the problem: + Case 1: If possible, write a program to directly solve it. If the problem involves enumerations, try to write a loop to iterate over all situations. Put your reasoning as comments in the code. + Case 2: If the problem only involve simple calculations or is mostly reasoning, you can solve it by yourself directly. You can use python to check calculations if necessary. + Case 3: If the problem cannot be handled with the two ways above, please follow this process: + + 1. Solve the problem step by step (do not over divide the steps). + 2. Take out any queries that can be asked through python (for example, any calculations or equations that can be calculated). + You must follow the formats below to write your code (otherwise it will not be recognized): + ```python + # your code + ``` + You should always use 'print' function for the output. + 3. Wait for the user to execute the python code and return results. Note the user can only execute python code. + 4. Continue your problem solving if you think the returned result is valid. If the returned result from user is invalid or unexpected, please correct your code or reasoning. + After all the steps with code are executed and you get the answer, put the answer in \\boxed{}. + """, + } + + def __init__(self, name, system_message=DEFAULT_SYSTEM_MESSAGE, work_dir=None, **config): + super().__init__(name, system_message) + self._work_dir = work_dir + self._config = self.DEFAULT_CONFIG.copy() + self._config.update(config) + self._sender_dict = {} + + # TODO: add key word args for the convenience of experiments + # the following code is adopted from Yiran's PR + self.max_round = 20 + self.prompt_loaction = "user" # "system" or "user" + self.max_invalid_q_per_step = 3 + self.use_cache = True + self.logger = None # TODO: add logger + self.prompt_type = "system_python" + self.prompt = MathAgent.PROMPTS[self.prompt_type] + # self._system_message = MathAgent.DEFAULT_SYSTEM_MESSAGE + self._system_message = self.prompt + self._file_to_be_saved = "test_math.txt" + + self._seperate_line = "\n" + "-" * 40 + "\n" + # to save the list of original senders and problems over time + self._original_senders_and_message = [] + + def _set_prompt(self, prompt): + """Set the prompt for the agent. + #TODO: Not using for now. May need to use it in the future. + """ + self.prompt = prompt + + def _save_message_to_file(self, message): + if self._file_to_be_saved is not None: + with open(self._file_to_be_saved, "a") as f: + f.write(message) + f.flush() + + @staticmethod + def _execution_agent_needed(message): + """Check if the execution agent is needed.""" + _, lang = extract_code(message) + if lang == "unknown": + return False + else: + return True + + def clear_conversation(self, archive_conversation=False): + """Clear the conversation history.""" + # TODO: do we need to clear self._sender_dict and self._conversations? + + if archive_conversation: + self._remember(self._conversations) + self._conversations = {} + + def _send_conversation(self, conversation, recipient): + """Send a conversation to the recipient.""" + recipient.receive_conversation(conversation, self) + + def _is_confirmative(self, message): + """Check if the message is confirmative.""" + msg = f"Is the message an confirmative message? Answer YES if it is {message}" + res = oai.ChatCompletion.create( + messages=[{"content": msg, "role": "user"}], **self._config, use_cache=self.use_cache + ) + response = oai.ChatCompletion.extract_text(res)[0] + if "YES" in response: + return True + else: + return False + + def receive(self, message, sender): + if sender.name not in self._conversations or len(self._conversations[sender.name]) == 0: + self._sender_dict[sender.name] = sender + self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}] + # TODO: better not change user's message. Change to a different approach. + # E.g., talk to a different agent. "User said: ..." + # prompted_message = self.prompt + "\n Problem: " + message # TODO: pay attention to the executation agent + prompted_message = message + else: + prompted_message = message + # if the sender is the execution agent, then we need to save the original sender and problem + # there could be multiple turns of conversation between the master agent and the math agent, + # we only need to save the original sender and problem once + # is_session_starts = len(self._conversations[sender.name]) == 1 + # TODO: may need to exclude all employed agent + if not sender.name.startswith(self.EXECUTION_AGENT_PREFIX) and not sender.name.startswith( + self.REFLECTION_AGENT_PREFIX + ): + # assuming the execution agent does not initiate a conversation with the math agent + self._original_senders_and_message.append((sender, message)) + super().receive(prompted_message, sender) + + if sender.name.startswith(self.REFLECTION_AGENT_PREFIX) and self._is_confirmative(message): + original_sender = self._original_senders_and_message[-1][0] + self._send(message, original_sender) + return + # # if the sender is the reflection agent, then we need to send the response to the original sender + # else: + # self._send(response, reflection_agent) + # save a readable conversation in txt file + # self._save_message_to_file(f"Problem: {self._str_splitter(prompted_question)}\n {self._seperate_line}") + messages = copy.deepcopy(self._conversations[sender.name]) + raw_responses = oai.ChatCompletion.create(messages=messages, **self._config, use_cache=self.use_cache) + response = oai.ChatCompletion.extract_text(raw_responses)[0] + print(f"\n Sender {sender.name}: {message}") + print(f"\n MATH AGENT: {response}") + + original_sender, _ = self._original_senders_and_message[-1] + if self._execution_agent_needed(response): + if sender.name.startswith(self.EXECUTION_AGENT_PREFIX): + excution_agent = sender + else: + # create an execution agent if an execution agent is needed + # TODO: should we consider the case where the execution agent is already created in the past? + excution_agent = ExecutionAgent(f"{self.EXECUTION_AGENT_PREFIX}{sender.name}", work_dir=self._work_dir) + # initialize the conversation + self._conversations[excution_agent.name] = self._conversations[sender.name].copy() + self._sender_dict[excution_agent.name] = excution_agent + # send the response to the execution agent + self._send(response, excution_agent) + else: + if sender.name.startswith(self.REFLECTION_AGENT_PREFIX): + reflection_agent = sender + else: + reflection_agent = ReflectionAgent( + f"{self.REFLECTION_AGENT_PREFIX}{sender.name}", work_dir=self._work_dir + ) + # initialize the conversation + self._conversations[reflection_agent.name] = self._conversations[sender.name].copy() + self._sender_dict[reflection_agent.name] = reflection_agent + merged_list = [item for sublist in self._conversations.values() for item in sublist] + merged_list.append({"content": response, "role": "assistant"}) + self._send_conversation(merged_list, reflection_agent) + self._send(response, reflection_agent) + + # print(f"Execution agent not needed. Sending to original sender {original_sender.name}") + # answer = self._validate_response(response) + # self._send(answer, original_sender) + + @staticmethod + def _str_splitter(string, length=130): + """ + Add '\n' every 'length' characters to make the output more readable. + If at 'length' there is a word, add '\n' before the word. + Args: + string (str): The input string to be processed. + length (int): The maximum number of characters in a line before adding a newline. + Returns: + str: The processed string with newlines added. + """ + + words = string.split(" ") + current_line = [] + current_length = 0 + result = [] + + for word in words: + if current_length + len(word) + len(current_line) > length: + result.append(" ".join(current_line)) + current_line = [] + current_length = 0 + + current_line.append(word) + current_length += len(word) + + if current_line: + result.append(" ".join(current_line)) + + return "\n".join(result) diff --git a/flaml/autogen/agent/reflection_agent.py b/flaml/autogen/agent/reflection_agent.py new file mode 100644 index 0000000000..132a6ae808 --- /dev/null +++ b/flaml/autogen/agent/reflection_agent.py @@ -0,0 +1,42 @@ +from .agent import Agent + +# import oai +from flaml import oai +from flaml.autogen.code_utils import DEFAULT_MODEL, FAST_MODEL + + +class ReflectionAgent(Agent): + """Reflect on the conversation. + Try to criticize the sender's message. + """ + + DEFAULT_CONFIG = { + "model": DEFAULT_MODEL, + } + DEFAULT_SYSTEM_MESSAGE = """You are a reflection agent. You try to criticize the sender's message. If you think the message is correct, add YES! before the original answer and return it. Otherwise give constructive suggestion. + """ + AGENT_PREFIX = "reflection_agent" + + def __init__(self, name, system_message="", work_dir=None): + super().__init__(name, system_message) + self._word_dir = work_dir + self._conversations = {} + self._system_message = ReflectionAgent.DEFAULT_SYSTEM_MESSAGE + self._config = ReflectionAgent.DEFAULT_CONFIG.copy() + self._sender_dict = {} + + def receive(self, message, sender): + if sender.name not in self._conversations: + self._sender_dict[sender.name] = sender + self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}] + super().receive(message, sender) + res = oai.ChatCompletion.create(messages=self._conversations[sender.name], **self._config) + critique = oai.ChatCompletion.extract_text(res)[0] + print("The critique is ", critique) + self._send(critique, sender) + + def receive_conversation(self, conversation, sender): + if sender.name not in self._conversations: + self._conversations[sender.name] = conversation + else: + self._conversations[sender.name] = self._conversations[sender.name] + conversation diff --git a/flaml/autogen/math_utils.py b/flaml/autogen/math_utils.py index fdf1cfbe8e..18af5e455c 100644 --- a/flaml/autogen/math_utils.py +++ b/flaml/autogen/math_utils.py @@ -1,5 +1,6 @@ from typing import Optional from flaml.autogen import oai, DEFAULT_MODEL +import re _MATH_PROMPT = "{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\boxed{{}}." _MATH_CONFIG = { @@ -342,3 +343,21 @@ def eval_math_responses(responses, solution=None, **args): "voted_answer": responses[answer], "votes": votes, } + + +def remove_asy_sections(input_string): + """Remove asy sections from the input string. + Args: + input_string (str): The input string. + Returns: + str: The string without asy sections. + """ + pattern = r"\[asy\](.*?)\[\\asy\]" + output_string = re.sub(pattern, "", input_string, flags=re.DOTALL) + pattern = r"\[asy\](.*?)\[/asy\]" + output_string = re.sub(pattern, "", output_string, flags=re.DOTALL) + pattern = r"\[ASY\](.*?)\[\\ASY\]" + output_string = re.sub(pattern, "", output_string, flags=re.DOTALL) + pattern = r"\[ASY\](.*?)\[/ASY\]" + output_string = re.sub(pattern, "", output_string, flags=re.DOTALL) + return output_string diff --git a/test/autogen/test_math_agent.py b/test/autogen/test_math_agent.py new file mode 100644 index 0000000000..caddb8dca2 --- /dev/null +++ b/test/autogen/test_math_agent.py @@ -0,0 +1,70 @@ +from flaml.autogen.agent.math_agent import MathAgent +from flaml.autogen.agent.human_agent import HumanAgent +from flaml.autogen.math_utils import eval_math_responses, get_answer + + +if __name__ == "__main__": + import openai + + openai.api_key_path = "test/openai/key.txt" + user_agent = HumanAgent("human agent") + math_agent = MathAgent("math_agent") + problems = [ + { + "problem": "solve the equation x^3=125", + "solution": "\\boxed{x=5}", + }, + { + "problem": "What is the sum of all positive integers $r$ that satisfy $$\\mathop{\\text{lcm}}[r,700] = 7000~?$$", + "level": "Level 5", + "type": "Number Theory", + "solution": "Note the prime factorizations $700=2^2\\cdot 5^2\\cdot 7$ and $7000=2^3\\cdot 5^3\\cdot 7$.\n\nIf $\\mathop{\\text{lcm}}[r,700]=7000$, then in particular, $r$ is a divisor of $7000$, so we can write $r=2^\\alpha\\cdot 5^\\beta\\cdot 7^\\gamma$, where $0\\le\\alpha\\le 3$, $0\\le\\beta\\le 3$, and $0\\le\\gamma\\le 1$.\n\nMoreover, we know that $\\mathop{\\text{lcm}}[r,700]=2^{\\max\\{\\alpha,2\\}}\\cdot 5^{\\max\\{\\beta,2\\}}\\cdot 7^{\\max\\{\\gamma,1\\}}$, and we know that this is equal to $7000=2^3\\cdot 5^3\\cdot 7$. This is possible only if $\\alpha=3$ and $\\beta=3$, but $\\gamma$ can be $0$ or $1$, giving us two choices for $r$: $$r = 2^3\\cdot 5^3\\cdot 7^0 = 1000 \\text{~~or~~} r=2^3\\cdot 5^3\\cdot 7^1 = 7000.$$So the sum of all solutions is $1000+7000=\\boxed{8000}$.", + "problem_id": "6", + "is_valid_reply": True, + "is_correct": False, + "correct_ans": "8000", + "voted_answer": "1440", + }, + { + "problem": "Find the value of $a_2+a_4+a_6+a_8+\\dots+a_{98}$ if $a_1, a_2, a_3, \\ldots$ is an arithmetic progression with common difference $1$ and \\[a_1+a_2+a_3+\\dots+a_{98}=137.\\]", + "level": "Level 5", + "type": "Algebra", + "solution": "Let $S = a_1 + a_3 + \\dots + a_{97}$ and $T = a_2 + a_4 + \\dots + a_{98}$. Then the given equation states that $S + T = 137$, and we want to find $T$.\n\nWe can build another equation relating $S$ and $T$: note that \\[\\begin{aligned} T-S &= (a_2-a_1) + (a_4-a_3) + \\dots + (a_{98}-a_{97}) \\\\ &= \\underbrace{1 + 1 + \\dots + 1}_{49 \\text{ times }} \\\\ &= 49 \\end{aligned}\\]since $(a_n)$ has common difference $1$. Then, adding the two equations $S+T=137$ and $T-S=49$, we get $2T=137+49=186$, so $T = \\tfrac{186}{2} = \\boxed{93}$.", + "problem_id": "1", + "is_valid_reply": True, + "is_correct": False, + "correct_ans": "93", + "voted_answer": "1269", + "round": 6.5, + }, + { + "problem": "Find all numbers $a$ for which the graph of $y=x^2+a$ and the graph of $y=ax$ intersect. Express your answer in interval notation.", + "level": "Level 5", + "type": "Algebra", + "solution": "If these two graphs intersect then the points of intersection occur when \\[x^2+a=ax,\\] or \\[x^2-ax+a=0.\\] This quadratic has solutions exactly when the discriminant is nonnegative: \\[(-a)^2-4\\cdot1\\cdot a\\geq0.\\] This simplifies to \\[a(a-4)\\geq0.\\] This quadratic (in $a$) is nonnegative when $a$ and $a-4$ are either both $\\ge 0$ or both $\\le 0$. This is true for $a$ in $$(-\\infty,0]\\cup[4,\\infty).$$ Therefore the line and quadratic intersect exactly when $a$ is in $\\boxed{(-\\infty,0]\\cup[4,\\infty)}$.", + "problem_id": "6", + "is_valid_reply": True, + "is_correct": False, + "correct_ans": "(-\\infty,0]\\cup[4,\\infty)", + "voted_answer": "a=0", + "round": 2.5, + "valid_q_count": 1, + "total_q_count": 1, + }, + ] + + for i, problem in enumerate(problems): + print("\n Hello! math_agent is solving this problem: {problem}".format(problem=problem)) + # send the problem to the math agent add the option to clear past conversation + math_agent.clear_conversation() + math_agent.receive(message=problem["problem"], sender=user_agent) + # get the answer from the math agent + result = user_agent.retrieve_conversation("math_agent") + # evaluate how good the answer is + result_with_ans = result if isinstance(result, str) else result["response_with_ans"] + metrics = eval_math_responses([result_with_ans], problem["solution"]) + # get the result + correct_ans = get_answer(problem["solution"]) + print("answer:", result_with_ans) + print("\n correct answer is:", correct_ans) + print("metrics:", metrics)