diff --git a/flaml/autogen/agent/execution_agent.py b/flaml/autogen/agent/execution_agent.py
index cafae427f9..b8c8ae3922 100644
--- a/flaml/autogen/agent/execution_agent.py
+++ b/flaml/autogen/agent/execution_agent.py
@@ -7,6 +7,10 @@ class ExecutionAgent(Agent):
     An execution agent can only communicate with other agents, and perform actions such as executing a command or code.
     """
 
+    DEFAULT_SYSTEM_MESSAGE = """You are an execution agent. You can only communicate with other agents, and perform actions such as executing a command or code.
+    """
+    AGENT_PREFIX = "execution_agent"
+
     def __init__(self, name, system_message="", work_dir=None):
         super().__init__(name, system_message)
         self._word_dir = work_dir
diff --git a/flaml/autogen/agent/human_agent.py b/flaml/autogen/agent/human_agent.py
new file mode 100644
index 0000000000..8b157fa3d6
--- /dev/null
+++ b/flaml/autogen/agent/human_agent.py
@@ -0,0 +1,25 @@
+from .agent import Agent
+
+
+class HumanAgent(Agent):
+    """Human Agent."""
+
+    DEFAULT_SYSTEM_MESSAGE = """You are human agent. You can give feedback to the sender.
+    """
+    AGENT_PREFIX = "human_agent"
+
+    def receive(self, message, sender):
+        """Receive a message from the sender agent.
+        Every time a message is received, the human agent will give feedback.
+        """
+        super().receive(message, sender)
+        print("Human agent received message: ", message)
+        # give feedback to the sender via standard input
+        print("Please give feedback to the sender (press enter to skip): ")
+        feedback = input()
+        if feedback:
+            self._send(feedback, sender)
+
+    def retrieve_conversation(self, agent_name):
+        """retrieve the conversation with the agent"""
+        return self._conversations[agent_name][-1]["content"]
diff --git a/flaml/autogen/agent/math_agent.py b/flaml/autogen/agent/math_agent.py
new file mode 100644
index 0000000000..5472ec6d2b
--- /dev/null
+++ b/flaml/autogen/agent/math_agent.py
@@ -0,0 +1,231 @@
+from .agent import Agent
+from .execution_agent import ExecutionAgent
+from .reflection_agent import ReflectionAgent
+from flaml.autogen.code_utils import DEFAULT_MODEL, FAST_MODEL
+from flaml import oai
+import copy
+from flaml.autogen.code_utils import extract_code
+
+
+class MathAgent(Agent):
+    """Solve a math problem.
+    Most of the code is adopted from the math_solver.py file in Yiran's PR:
+    https://github.com/microsoft/FLAML/blob/ac11d2a7bb91f0f210ce0c67ec7b628d967e27b5/flaml/autogen/math/math_solver.py
+    """
+
+    DEFAULT_SYSTEM_MESSAGE = """You are a helpful assistant.
+    """
+    AGENT_PREFIX = "math_agent"
+
+    DEFAULT_CONFIG = {
+        "model": DEFAULT_MODEL,  # default model is gpt-4
+    }
+    EXECUTION_AGENT_PREFIX = "execution_agent4"
+    SUCCESS_EXIT_CODE = "exitcode: 0\n"
+    REFLECTION_AGENT_PREFIX = "reflection_agent4"
+
+    PROMPTS = {
+        "v3.1python": """Let's use python to solve a math problem.
+            Query requirements:
+            You should always use 'print' function for the output, and use fractions/radical forms instead of decimal.
+            You must following the formats below to write your code (otherwise it will not be recognized):
+            ```python
+            # your code
+            ```
+            First state the key idea to solve the problem. You may choose from 3 ways to solve the problem:
+            Case 1: If possible, write a program to directly solve it. If the problem involves enumerations, try to write a loop to iterate over all situations. Put your reasoning as comments in the code.
+            Case 2: If the problem only involve simple calculations or is mostly reasoning, you can solve it by yourself directly. You can use python to check calculations if necessary.
+            Case 3: If the problem cannot be handled with the two ways above, please follow this process:
+            1. Solve the problem step by step (do not overdivide the steps).
+            2. Take out any queries that can be asked through python (for example, any calculations or equations that can be calculated).
+            3. Wait for me to give the results.
+            4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.
+            After all the queries are run and you get the answer, put the answer in \\boxed{}.
+            """,
+        "system_python": """You are a helpful assistant who can help users solve math problems.
+            First state the key idea to solve the problem. You may choose from 3 ways to solve the problem:
+            Case 1: If possible, write a program to directly solve it. If the problem involves enumerations, try to write a loop to iterate over all situations. Put your reasoning as comments in the code.
+            Case 2: If the problem only involve simple calculations or is mostly reasoning, you can solve it by yourself directly. You can use python to check calculations if necessary.
+            Case 3: If the problem cannot be handled with the two ways above, please follow this process:
+
+            1. Solve the problem step by step (do not over divide the steps).
+            2. Take out any queries that can be asked through python (for example, any calculations or equations that can be calculated).
+            You must follow the formats below to write your code (otherwise it will not be recognized):
+            ```python
+            # your code
+            ```
+            You should always use 'print' function for the output.
+            3. Wait for the user to execute the python code and return results. Note the user can only execute python code.
+            4. Continue your problem solving if you think the returned result is valid. If the returned result from user is invalid or unexpected, please correct your code or reasoning.
+            After all the steps with code are executed and you get the answer, put the answer in \\boxed{}.
+            """,
+    }
+
+    def __init__(self, name, system_message=DEFAULT_SYSTEM_MESSAGE, work_dir=None, **config):
+        super().__init__(name, system_message)
+        self._work_dir = work_dir
+        self._config = self.DEFAULT_CONFIG.copy()
+        self._config.update(config)
+        self._sender_dict = {}
+
+        # TODO: add key word args for the convenience of experiments
+        # the following code is adopted from Yiran's PR
+        self.max_round = 20
+        self.prompt_loaction = "user"  # "system" or "user"
+        self.max_invalid_q_per_step = 3
+        self.use_cache = True
+        self.logger = None  # TODO: add logger
+        self.prompt_type = "system_python"
+        self.prompt = MathAgent.PROMPTS[self.prompt_type]
+        # self._system_message = MathAgent.DEFAULT_SYSTEM_MESSAGE
+        self._system_message = self.prompt
+        self._file_to_be_saved = "test_math.txt"
+
+        self._seperate_line = "\n" + "-" * 40 + "\n"
+        # to save the list of original senders and problems over time
+        self._original_senders_and_message = []
+
+    def _set_prompt(self, prompt):
+        """Set the prompt for the agent.
+        #TODO: Not using for now. May need to use it in the future.
+        """
+        self.prompt = prompt
+
+    def _save_message_to_file(self, message):
+        if self._file_to_be_saved is not None:
+            with open(self._file_to_be_saved, "a") as f:
+                f.write(message)
+                f.flush()
+
+    @staticmethod
+    def _execution_agent_needed(message):
+        """Check if the execution agent is needed."""
+        _, lang = extract_code(message)
+        if lang == "unknown":
+            return False
+        else:
+            return True
+
+    def clear_conversation(self, archive_conversation=False):
+        """Clear the conversation history."""
+        # TODO: do we need to clear self._sender_dict and self._conversations?
+
+        if archive_conversation:
+            self._remember(self._conversations)
+        self._conversations = {}
+
+    def _send_conversation(self, conversation, recipient):
+        """Send a conversation to the recipient."""
+        recipient.receive_conversation(conversation, self)
+
+    def _is_confirmative(self, message):
+        """Check if the message is confirmative."""
+        msg = f"Is the message an confirmative message? Answer YES if it is {message}"
+        res = oai.ChatCompletion.create(
+            messages=[{"content": msg, "role": "user"}], **self._config, use_cache=self.use_cache
+        )
+        response = oai.ChatCompletion.extract_text(res)[0]
+        if "YES" in response:
+            return True
+        else:
+            return False
+
+    def receive(self, message, sender):
+        if sender.name not in self._conversations or len(self._conversations[sender.name]) == 0:
+            self._sender_dict[sender.name] = sender
+            self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}]
+            # TODO: better not change user's message. Change to a different approach.
+            # E.g., talk to a different agent. "User said: ..."
+            # prompted_message = self.prompt + "\n Problem: " + message  # TODO: pay attention to the executation agent
+            prompted_message = message
+        else:
+            prompted_message = message
+        # if the sender is the execution agent, then we need to save the original sender and problem
+        # there could be multiple turns of conversation between the master agent and the math agent,
+        # we only need to save the original sender and problem once
+        # is_session_starts = len(self._conversations[sender.name]) == 1
+        # TODO: may need to exclude all employed agent
+        if not sender.name.startswith(self.EXECUTION_AGENT_PREFIX) and not sender.name.startswith(
+            self.REFLECTION_AGENT_PREFIX
+        ):
+            # assuming the execution agent does not initiate a conversation with the math agent
+            self._original_senders_and_message.append((sender, message))
+        super().receive(prompted_message, sender)
+
+        if sender.name.startswith(self.REFLECTION_AGENT_PREFIX) and self._is_confirmative(message):
+            original_sender = self._original_senders_and_message[-1][0]
+            self._send(message, original_sender)
+            return
+        #     # if the sender is the reflection agent, then we need to send the response to the original sender
+        # else:
+        #     self._send(response, reflection_agent)
+        # save a readable conversation in txt file
+        # self._save_message_to_file(f"Problem: {self._str_splitter(prompted_question)}\n {self._seperate_line}")
+        messages = copy.deepcopy(self._conversations[sender.name])
+        raw_responses = oai.ChatCompletion.create(messages=messages, **self._config, use_cache=self.use_cache)
+        response = oai.ChatCompletion.extract_text(raw_responses)[0]
+        print(f"\n Sender {sender.name}: {message}")
+        print(f"\n MATH AGENT: {response}")
+
+        original_sender, _ = self._original_senders_and_message[-1]
+        if self._execution_agent_needed(response):
+            if sender.name.startswith(self.EXECUTION_AGENT_PREFIX):
+                excution_agent = sender
+            else:
+                # create an execution agent if an execution agent is needed
+                # TODO: should we consider the case where the execution agent is already created in the past?
+                excution_agent = ExecutionAgent(f"{self.EXECUTION_AGENT_PREFIX}{sender.name}", work_dir=self._work_dir)
+                # initialize the conversation
+                self._conversations[excution_agent.name] = self._conversations[sender.name].copy()
+                self._sender_dict[excution_agent.name] = excution_agent
+            # send the response to the execution agent
+            self._send(response, excution_agent)
+        else:
+            if sender.name.startswith(self.REFLECTION_AGENT_PREFIX):
+                reflection_agent = sender
+            else:
+                reflection_agent = ReflectionAgent(
+                    f"{self.REFLECTION_AGENT_PREFIX}{sender.name}", work_dir=self._work_dir
+                )
+                # initialize the conversation
+                self._conversations[reflection_agent.name] = self._conversations[sender.name].copy()
+                self._sender_dict[reflection_agent.name] = reflection_agent
+            merged_list = [item for sublist in self._conversations.values() for item in sublist]
+            merged_list.append({"content": response, "role": "assistant"})
+            self._send_conversation(merged_list, reflection_agent)
+            self._send(response, reflection_agent)
+
+            # print(f"Execution agent not needed. Sending to original sender {original_sender.name}")
+            # answer = self._validate_response(response)
+            # self._send(answer, original_sender)
+
+    @staticmethod
+    def _str_splitter(string, length=130):
+        """
+        Add '\n' every 'length' characters to make the output more readable.
+        If at 'length' there is a word, add '\n' before the word.
+        Args:
+            string (str): The input string to be processed.
+            length (int): The maximum number of characters in a line before adding a newline.
+        Returns:
+            str: The processed string with newlines added.
+        """
+
+        words = string.split(" ")
+        current_line = []
+        current_length = 0
+        result = []
+
+        for word in words:
+            if current_length + len(word) + len(current_line) > length:
+                result.append(" ".join(current_line))
+                current_line = []
+                current_length = 0
+
+            current_line.append(word)
+            current_length += len(word)
+
+        if current_line:
+            result.append(" ".join(current_line))
+
+        return "\n".join(result)
diff --git a/flaml/autogen/agent/reflection_agent.py b/flaml/autogen/agent/reflection_agent.py
new file mode 100644
index 0000000000..132a6ae808
--- /dev/null
+++ b/flaml/autogen/agent/reflection_agent.py
@@ -0,0 +1,42 @@
+from .agent import Agent
+
+# import oai
+from flaml import oai
+from flaml.autogen.code_utils import DEFAULT_MODEL, FAST_MODEL
+
+
+class ReflectionAgent(Agent):
+    """Reflect on the conversation.
+    Try to criticize the sender's message.
+    """
+
+    DEFAULT_CONFIG = {
+        "model": DEFAULT_MODEL,
+    }
+    DEFAULT_SYSTEM_MESSAGE = """You are a reflection agent. You try to criticize the sender's message. If you think the message is correct, add YES! before the original answer and return it. Otherwise give constructive suggestion.
+    """
+    AGENT_PREFIX = "reflection_agent"
+
+    def __init__(self, name, system_message="", work_dir=None):
+        super().__init__(name, system_message)
+        self._word_dir = work_dir
+        self._conversations = {}
+        self._system_message = ReflectionAgent.DEFAULT_SYSTEM_MESSAGE
+        self._config = ReflectionAgent.DEFAULT_CONFIG.copy()
+        self._sender_dict = {}
+
+    def receive(self, message, sender):
+        if sender.name not in self._conversations:
+            self._sender_dict[sender.name] = sender
+            self._conversations[sender.name] = [{"content": self._system_message, "role": "system"}]
+        super().receive(message, sender)
+        res = oai.ChatCompletion.create(messages=self._conversations[sender.name], **self._config)
+        critique = oai.ChatCompletion.extract_text(res)[0]
+        print("The critique is ", critique)
+        self._send(critique, sender)
+
+    def receive_conversation(self, conversation, sender):
+        if sender.name not in self._conversations:
+            self._conversations[sender.name] = conversation
+        else:
+            self._conversations[sender.name] = self._conversations[sender.name] + conversation
diff --git a/flaml/autogen/math_utils.py b/flaml/autogen/math_utils.py
index fdf1cfbe8e..18af5e455c 100644
--- a/flaml/autogen/math_utils.py
+++ b/flaml/autogen/math_utils.py
@@ -1,5 +1,6 @@
 from typing import Optional
 from flaml.autogen import oai, DEFAULT_MODEL
+import re
 
 _MATH_PROMPT = "{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\boxed{{}}."
 _MATH_CONFIG = {
@@ -342,3 +343,21 @@ def eval_math_responses(responses, solution=None, **args):
         "voted_answer": responses[answer],
         "votes": votes,
     }
+
+
+def remove_asy_sections(input_string):
+    """Remove asy sections from the input string.
+    Args:
+        input_string (str): The input string.
+    Returns:
+        str: The string without asy sections.
+    """
+    pattern = r"\[asy\](.*?)\[\\asy\]"
+    output_string = re.sub(pattern, "", input_string, flags=re.DOTALL)
+    pattern = r"\[asy\](.*?)\[/asy\]"
+    output_string = re.sub(pattern, "", output_string, flags=re.DOTALL)
+    pattern = r"\[ASY\](.*?)\[\\ASY\]"
+    output_string = re.sub(pattern, "", output_string, flags=re.DOTALL)
+    pattern = r"\[ASY\](.*?)\[/ASY\]"
+    output_string = re.sub(pattern, "", output_string, flags=re.DOTALL)
+    return output_string
diff --git a/test/autogen/test_math_agent.py b/test/autogen/test_math_agent.py
new file mode 100644
index 0000000000..caddb8dca2
--- /dev/null
+++ b/test/autogen/test_math_agent.py
@@ -0,0 +1,70 @@
+from flaml.autogen.agent.math_agent import MathAgent
+from flaml.autogen.agent.human_agent import HumanAgent
+from flaml.autogen.math_utils import eval_math_responses, get_answer
+
+
+if __name__ == "__main__":
+    import openai
+
+    openai.api_key_path = "test/openai/key.txt"
+    user_agent = HumanAgent("human agent")
+    math_agent = MathAgent("math_agent")
+    problems = [
+        {
+            "problem": "solve the equation x^3=125",
+            "solution": "\\boxed{x=5}",
+        },
+        {
+            "problem": "What is the sum of all positive integers $r$ that satisfy $$\\mathop{\\text{lcm}}[r,700] = 7000~?$$",
+            "level": "Level 5",
+            "type": "Number Theory",
+            "solution": "Note the prime factorizations $700=2^2\\cdot 5^2\\cdot 7$ and $7000=2^3\\cdot 5^3\\cdot 7$.\n\nIf $\\mathop{\\text{lcm}}[r,700]=7000$, then in particular, $r$ is a divisor of $7000$, so we can write $r=2^\\alpha\\cdot 5^\\beta\\cdot 7^\\gamma$, where $0\\le\\alpha\\le 3$, $0\\le\\beta\\le 3$, and $0\\le\\gamma\\le 1$.\n\nMoreover, we know that $\\mathop{\\text{lcm}}[r,700]=2^{\\max\\{\\alpha,2\\}}\\cdot 5^{\\max\\{\\beta,2\\}}\\cdot 7^{\\max\\{\\gamma,1\\}}$, and we know that this is equal to $7000=2^3\\cdot 5^3\\cdot 7$. This is possible only if $\\alpha=3$ and $\\beta=3$, but $\\gamma$ can be $0$ or $1$, giving us two choices for $r$: $$r = 2^3\\cdot 5^3\\cdot 7^0 = 1000 \\text{~~or~~} r=2^3\\cdot 5^3\\cdot 7^1 = 7000.$$So the sum of all solutions is $1000+7000=\\boxed{8000}$.",
+            "problem_id": "6",
+            "is_valid_reply": True,
+            "is_correct": False,
+            "correct_ans": "8000",
+            "voted_answer": "1440",
+        },
+        {
+            "problem": "Find the value of $a_2+a_4+a_6+a_8+\\dots+a_{98}$ if $a_1, a_2, a_3, \\ldots$ is an arithmetic progression with common difference $1$ and \\[a_1+a_2+a_3+\\dots+a_{98}=137.\\]",
+            "level": "Level 5",
+            "type": "Algebra",
+            "solution": "Let $S = a_1 + a_3 + \\dots + a_{97}$ and $T = a_2 + a_4 + \\dots + a_{98}$. Then the given equation states that $S + T = 137$, and we want to find $T$.\n\nWe can build another equation relating $S$ and $T$: note that \\[\\begin{aligned} T-S &= (a_2-a_1) + (a_4-a_3) + \\dots + (a_{98}-a_{97}) \\\\ &= \\underbrace{1 + 1 + \\dots + 1}_{49 \\text{ times }} \\\\ &= 49 \\end{aligned}\\]since $(a_n)$ has common difference $1$. Then, adding the two equations $S+T=137$ and $T-S=49$, we get $2T=137+49=186$, so $T = \\tfrac{186}{2} = \\boxed{93}$.",
+            "problem_id": "1",
+            "is_valid_reply": True,
+            "is_correct": False,
+            "correct_ans": "93",
+            "voted_answer": "1269",
+            "round": 6.5,
+        },
+        {
+            "problem": "Find all numbers $a$ for which the graph of $y=x^2+a$ and the graph of $y=ax$ intersect. Express your answer in interval notation.",
+            "level": "Level 5",
+            "type": "Algebra",
+            "solution": "If these two graphs intersect then the points of intersection occur when  \\[x^2+a=ax,\\] or  \\[x^2-ax+a=0.\\] This quadratic has solutions exactly when the discriminant is nonnegative: \\[(-a)^2-4\\cdot1\\cdot a\\geq0.\\] This simplifies to  \\[a(a-4)\\geq0.\\] This quadratic (in $a$) is nonnegative when $a$ and $a-4$ are either both $\\ge 0$ or both $\\le 0$. This is true for $a$ in $$(-\\infty,0]\\cup[4,\\infty).$$ Therefore the line and quadratic intersect exactly when $a$ is in $\\boxed{(-\\infty,0]\\cup[4,\\infty)}$.",
+            "problem_id": "6",
+            "is_valid_reply": True,
+            "is_correct": False,
+            "correct_ans": "(-\\infty,0]\\cup[4,\\infty)",
+            "voted_answer": "a=0",
+            "round": 2.5,
+            "valid_q_count": 1,
+            "total_q_count": 1,
+        },
+    ]
+
+    for i, problem in enumerate(problems):
+        print("\n Hello! math_agent is solving this problem: {problem}".format(problem=problem))
+        # send the problem to the math agent add the option to clear past conversation
+        math_agent.clear_conversation()
+        math_agent.receive(message=problem["problem"], sender=user_agent)
+        # get the answer from the math agent
+        result = user_agent.retrieve_conversation("math_agent")
+        # evaluate how good the answer is
+        result_with_ans = result if isinstance(result, str) else result["response_with_ans"]
+        metrics = eval_math_responses([result_with_ans], problem["solution"])
+        # get the result
+        correct_ans = get_answer(problem["solution"])
+        print("answer:", result_with_ans)
+        print("\n correct answer is:", correct_ans)
+        print("metrics:", metrics)