From dc21cdab28aa5f2159948e6da816423e11fce2d3 Mon Sep 17 00:00:00 2001
From: mormio <morganemoss@gmail.com>
Date: Mon, 4 Nov 2024 10:39:03 -0500
Subject: [PATCH 1/3] reasoning code

---
 froggy/tools/reasoning.py | 48 +++++++++++++++++++++++++++++++++++++++
 scripts/run.py            |  4 ++++
 2 files changed, 52 insertions(+)
 create mode 100644 froggy/tools/reasoning.py
diff --git a/froggy/tools/reasoning.py b/froggy/tools/reasoning.py
new file mode 100644
index 000000000..f9359d2ab
--- /dev/null
+++ b/froggy/tools/reasoning.py
@@ -0,0 +1,48 @@
+from froggy.tools import EnvironmentTool
+
+
+class ReasoningTool(EnvironmentTool):
+    name: str = "reasoning"
+    action: str = "<reasoning>"
+    description: str = "Preface any action with explicit reasoning tokens."
+
+    @property
+    def instructions(self):
+        assert hasattr(self, "environment")
+        instruction = {
+            "template": "<reasoning> ... </reasoning>",
+            "description": """You may explicitly reason about the current state and the best next action before taking it. You follow a particular reasoning style: 
+You break down complex problems into smaller parts and reason through them step by step, arriving at the best next action before then executing it. You should follow your reasoning 
+with your next action.""",
+            "examples": [
+                "<reasoning> The execution trace points to line 43 in main.py, so I'll place a breakpoint there.</reasoning> ```pdb b 43",
+                "<reasoning> There's a shape mismatch that corresponds to a matrix transpose, so I'll rewrite the function to account for the transpose. </reasoning> ```rewrite ....",
+            ],
+        }
+        return instruction
+
+    def register(self, environment):
+        from autopdb.envs import RepoEnv
+
+        if not isinstance(environment, RepoEnv):
+            raise ValueError("The environment must be a RepoEnv instance.")
+
+        self.environment = environment
+
+    def is_triggered(self, action):
+        return action.startswith(self.action)
+
+    def use(self, action):
+        """Reasoning tokens are only to benefit the model, so we strip them and then pass the remainder of the action
+        as a free next action.
+        """
+        remaining_action = self.remove_reasoning(action)
+        # now execute the next action
+        next_action_obs = self.environment.step(remaining_action)
+        if next_action_obs == f"Invalid action: {action}.":
+            next_action_obs == f"You must provide a valid action after your reasoning. Found invalid action: {action}."
+        return next_action_obs[0]
+
+    def remove_reasoning(self, action):
+        items = action.split("</reasoning>")
+        return " ".join(items[1:]).lstrip()
\ No newline at end of file
diff --git a/scripts/run.py b/scripts/run.py
index 22c09fd4b..8b41ca49b 100644
--- a/scripts/run.py
+++ b/scripts/run.py
@@ -55,6 +55,10 @@ def main():
             env.add_tool(
                 PDBTool(persistent_breakpoints=config["persistent_breakpoints"])
             )
+        elif "reasoning" == tool:
+            from froggy.tools.reasoning import ReasoningTool
+
+            env.add_tool(ReasoningTool())
         elif tool.startswith("patcher"):
             from froggy.tools.patchers import CodePatcher
 

From 435f70693568544e52ed7a02e3fdb71bec2b195e Mon Sep 17 00:00:00 2001
From: mormio <morganemoss@gmail.com>
Date: Mon, 4 Nov 2024 13:15:37 -0500
Subject: [PATCH 2/3] add option to use without a chained action

---
 froggy/tools/reasoning.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/froggy/tools/reasoning.py b/froggy/tools/reasoning.py
index f9359d2ab..c4b651118 100644
--- a/froggy/tools/reasoning.py
+++ b/froggy/tools/reasoning.py
@@ -20,9 +20,13 @@ def instructions(self):
             ],
         }
         return instruction
+    
+    def __init__(self, allow_chain_action: bool = False):
+        super().__init__()
+        self.allow_chain_action = allow_chain_action
 
     def register(self, environment):
-        from autopdb.envs import RepoEnv
+        from froggy.envs import RepoEnv
 
         if not isinstance(environment, RepoEnv):
             raise ValueError("The environment must be a RepoEnv instance.")
@@ -31,8 +35,14 @@ def register(self, environment):
 
     def is_triggered(self, action):
         return action.startswith(self.action)
-
+    
     def use(self, action):
+        if self.allow_chain_action:
+            return self.use_with_chaining(action)
+        else:
+            return self.use_without_chaining()
+
+    def use_with_chaining(self, action):
         """Reasoning tokens are only to benefit the model, so we strip them and then pass the remainder of the action
         as a free next action.
         """
@@ -42,6 +52,9 @@ def use(self, action):
         if next_action_obs == f"Invalid action: {action}.":
             next_action_obs == f"You must provide a valid action after your reasoning. Found invalid action: {action}."
         return next_action_obs[0]
+    
+    def use_without_chaining(self):
+        return "Reasoning text acknowledged."
 
     def remove_reasoning(self, action):
         items = action.split("</reasoning>")

From 10c9537c887c952db97e182718571eb3e9e6b9a4 Mon Sep 17 00:00:00 2001
From: mormio <morganemoss@gmail.com>
Date: Mon, 4 Nov 2024 14:35:55 -0500
Subject: [PATCH 3/3] update instructions, feedback

---
 froggy/tools/reasoning.py | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/froggy/tools/reasoning.py b/froggy/tools/reasoning.py
index c4b651118..ca018367d 100644
--- a/froggy/tools/reasoning.py
+++ b/froggy/tools/reasoning.py
@@ -11,16 +11,35 @@ def instructions(self):
         assert hasattr(self, "environment")
         instruction = {
             "template": "<reasoning> ... </reasoning>",
-            "description": """You may explicitly reason about the current state and the best next action before taking it. You follow a particular reasoning style: 
-You break down complex problems into smaller parts and reason through them step by step, arriving at the best next action before then executing it. You should follow your reasoning 
-with your next action.""",
-            "examples": [
-                "<reasoning> The execution trace points to line 43 in main.py, so I'll place a breakpoint there.</reasoning> ```pdb b 43",
-                "<reasoning> There's a shape mismatch that corresponds to a matrix transpose, so I'll rewrite the function to account for the transpose. </reasoning> ```rewrite ....",
-            ],
+            "description": self.description,
+            "examples": self.examples,
         }
         return instruction
     
+    @property
+    def examples(self):
+        if self.allow_chain_action:
+            ex = [
+                "<reasoning> The execution trace points to line 43 in main.py, so I'll place a breakpoint there.</reasoning> ```pdb b 43",
+                "<reasoning> There's a shape mismatch that corresponds to a matrix transpose, so I'll rewrite the function to account for the transpose. </reasoning> ```rewrite ....",
+            ]
+        else:
+            ex = [
+                "<reasoning> The execution trace points to line 43 in main.py, so I'll place a breakpoint there.</reasoning> ",
+                "<reasoning> There's a shape mismatch that corresponds to a matrix transpose, so I'll rewrite the function to account for the transpose. </reasoning>",
+            ]
+        return ex
+    
+    @property 
+    def description(self):
+        if self.allow_chain_action:
+            desc = f"""You may explicitly reason about the current state and the best course of action before executing. You follow a particular reasoning style: 
+You break down complex problems into smaller parts and reason through them step by step, arriving at the best next action before then executing it. You should follow your reasoning with your next action. """
+        else:
+            desc = f"""You may explicitly reason about the current state and the best course of action. You follow a particular reasoning style: 
+You break down complex problems into smaller parts and reason through them step by step, arriving at the best next action(s). """
+        return desc 
+
     def __init__(self, allow_chain_action: bool = False):
         super().__init__()
         self.allow_chain_action = allow_chain_action
@@ -51,7 +70,7 @@ def use_with_chaining(self, action):
         next_action_obs = self.environment.step(remaining_action)
         if next_action_obs == f"Invalid action: {action}.":
             next_action_obs == f"You must provide a valid action after your reasoning. Found invalid action: {action}."
-        return next_action_obs[0]
+        return f"Reasoning text acknowledged. {next_action_obs[0]}"
     
     def use_without_chaining(self):
         return "Reasoning text acknowledged."