seperate commit0 specific stuff from agent stuff

wenting-zhao · wenting-zhao · commit ad23e197efc7 · 2024-09-18T03:49:36.000Z
diff --git a/baselines/class_types.py b/baselines/class_types.py
@@ -11,8 +11,9 @@ class Commit0Config:
 
 
 @dataclass
-class AiderConfig:
-    llm_name: str
+class AgentConfig:
+    agent_name: str
+    model_name: str
     use_user_prompt: bool
     user_prompt: str
     use_repo_info: bool
diff --git a/baselines/commit0_utils.py b/baselines/commit0_utils.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import Any, Dict, List
 
-from baselines.class_types import AiderConfig
+from baselines.class_types import AgentConfig
 
 PROMPT_HEADER = ">>> Here is the Task:\n"
 REFERENCE_HEADER = "\n\n>>> Here is the Reference for you to finish the task:\n"
@@ -138,51 +138,51 @@ def get_target_edit_files(target_dir: str) -> list[str]:
     return files
 
 
-def get_message_to_aider(
-    aider_config: AiderConfig,
+def get_message(
+    agent_config: AgentConfig,
     target_edit_files_cmd_args: str,
     repo_path: str,
     ds: Dict[str, Any],
 ) -> str:
     """Get the message to Aider."""
-    prompt = f"{PROMPT_HEADER} " + aider_config.user_prompt
+    prompt = f"{PROMPT_HEADER}" + agent_config.user_prompt
 
-    if aider_config.use_unit_tests_info and ds["test"]["test_dir"]:
+    if agent_config.use_unit_tests_info and ds["test"]["test_dir"]:
         unit_tests_info = (
             f"\n{UNIT_TESTS_INFO_HEADER} "
             + get_dir_info(
                 dir_path=Path(os.path.join(repo_path, ds["test"]["test_dir"])),
                 prefix="",
                 include_stubs=True,
-            )[: aider_config.max_unit_tests_info_length]
+            )[: agent_config.max_unit_tests_info_length]
         )
     else:
         unit_tests_info = ""
 
     # TODO: assuming we have specification, which we currently do not have
-    if aider_config.use_reference_info and ds["specification"]:
+    if agent_config.use_reference_info and ds["specification"]:
         reference = (
             f"\n{REFERENCE_HEADER} "
             + get_reference(ds["specification"])[
-                : aider_config.max_reference_info_length
+                : agent_config.max_reference_info_length
             ]
         )
     else:
         reference = ""
 
-    if aider_config.use_repo_info:
+    if agent_config.use_repo_info:
         repo_info = (
             f"\n{REPO_INFO_HEADER} "
             + get_dir_info(
                 dir_path=Path(repo_path), prefix="", max_depth=2, include_stubs=False
-            )[: aider_config.max_repo_info_length]
+            )[: agent_config.max_repo_info_length]
         )
     else:
         repo_info = ""
 
-    message_to_aider = prompt + reference + repo_info + unit_tests_info
+    message_to_agent = prompt + reference + repo_info + unit_tests_info
 
-    return message_to_aider
+    return message_to_agent
 
 
 def get_reference(specification_pdf_path: str) -> str:
diff --git a/baselines/configs/agent.yaml b/baselines/configs/agent.yaml
@@ -6,12 +6,11 @@ defaults:
 commit0_config:
   repo_split: minitorch
 
-aider_config:
+agent_config:
   use_user_prompt: false
   use_repo_info: false
   use_unit_tests_info: false
   use_reference_info: false
   use_lint_info: false
   pre_commit_config_path: .pre-commit-config.yaml
   run_tests: false
-  llm_name: o1-preview
diff --git a/baselines/configs/base.yaml b/baselines/configs/base.yaml
@@ -10,8 +10,9 @@ commit0_config:
   repo_split: "simpy"
   num_workers: 10
 
-aider_config:
-  llm_name: "claude-3-5-sonnet-20240620"
+agent_config:
+  agent_name: "aider"
+  model_name: "claude-3-5-sonnet-20240620"
   use_user_prompt: false
   user_prompt: "Here is your task:\nYou need to implement all functions with 'NotImplementedError('IMPLEMENT ME HERE')' and pass the unit tests.\nDo not change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc.\nWhen you generate code, you must maintain the original formatting of the function stubs (such as whitespaces), otherwise we will not able to search/replace blocks for code modifications, and therefore you will receive a score of 0 for your generated code."
   use_repo_info: false
diff --git a/baselines/run_agent.py b/baselines/run_agent.py
@@ -4,24 +4,21 @@
 import hydra
 from datasets import load_dataset
 import traceback
-from baselines.baseline_utils import (
-    get_message_to_aider,
+from baselines.commit0_utils import (
+    get_message,
     get_target_edit_files,
 )
+from baselines.agents import AiderAgents
 from typing import Optional, Type
 from types import TracebackType
 from hydra.core.config_store import ConfigStore
-from baselines.class_types import AiderConfig, Commit0Config
+from baselines.class_types import AgentConfig, Commit0Config
 from commit0.harness.constants import SPLIT
 from commit0.harness.get_pytest_ids import main as get_tests
 from tqdm import tqdm
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from commit0.harness.constants import RUN_AIDER_LOG_DIR
 
-from aider.coders import Coder
-from aider.models import Model
-from aider.io import InputOutput
-
 
 class DirContext:
     def __init__(self, d):
@@ -38,33 +35,9 @@ def __exit__(
         os.chdir(self.cwd)
 
 
-def run_aider(
-    model_name: str,
-    fnames: list[str],
-    message: str,
-    test_cmd: str,
-    lint_cmd: str,
-    log_dir: Path,
-) -> None:
-    if test_cmd:
-        auto_test = True
-    else:
-        auto_test = False
-    if lint_cmd:
-        auto_lint = True
-    else:
-        auto_lint = False
-    model = Model(model_name)
-    input_history_file = log_dir / ".aider.input.history"
-    chat_history_file = log_dir / ".aider.chat.history.md"
-    io = InputOutput(yes=True, input_history_file=input_history_file, chat_history_file=chat_history_file)
-    coder = Coder.create(main_model=model, fnames=fnames, auto_lint=auto_lint, lint_cmds=lint_cmd, io=io)
-    coder.run(message)
-
-
-def run_aider_for_repo(
+def run_agent_for_repo(
     commit0_config: Commit0Config | None,
-    aider_config: AiderConfig | None,
+    agent_config: AgentConfig | None,
     ds: dict,
 ) -> None:
     """Run Aider for a given repository."""
@@ -83,59 +56,42 @@ def run_aider_for_repo(
 
     target_edit_files = get_target_edit_files(repo_path)
 
+    if agent_config.agent_name == "aider":
+        agent = AiderAgents(agent_config.model_name)
+    else:
+        raise NotImplementedError(f"{agent_config.agent} is not implemented; please add your implementations in baselines/agents.py.")
+
     with DirContext(repo_path):
-        if commit0_config is None or aider_config is None:
+        if commit0_config is None or agent_config is None:
             raise ValueError("Invalid input")
 
-        message_to_aider = get_message_to_aider(
-            aider_config, target_edit_files, repo_path, ds
+        message = get_message(
+            agent_config, target_edit_files, repo_path, ds
         )
 
-        if aider_config.use_lint_info:
+        if agent_config.use_lint_info:
             lint_cmd = "pre-commit run --config ../../.pre-commit-config.yaml --files"
         else:
             lint_cmd = ""
 
-        if aider_config.run_tests:
+        if agent_config.run_tests:
+            # when unit test feedback is available, iterate over test files
             for test_file in test_files:
                 test_cmd = f"python -m commit0 test {repo_path} {test_file}"
-                # set up logging
                 test_file_name = test_file.replace(".py", "").replace("/", "__")
                 log_dir = RUN_AIDER_LOG_DIR / "with_tests" / test_file_name
-                log_dir.mkdir(parents=True, exist_ok=True)
-                log_file = log_dir / "run_aider.log"
-
-                aider_cmd = run_aider(
-                    aider_config.llm_name,
-                    target_edit_files,
-                    message_to_aider,
-                    test_cmd,
-                    lint_cmd,
-                    log_dir,
-                )
 
-                # write aider command to log file
-                aider_cmd_file = Path(log_dir / "aider_cmd.sh")
-                aider_cmd_file.write_text(aider_cmd)
-
-                # write test command to log file
-                test_cmd_file = Path(log_dir / "test_cmd.sh")
-                test_cmd_file.write_text(test_cmd)
+                agent.run(
+                    message, test_cmd, lint_cmd, target_edit_files, log_dir,
+                )
         else:
-            test_cmd = ""
+            # when unit test feedback is not available, iterate over target files to edit
             for f in target_edit_files:
                 file_name = f.replace(".py", "").replace("/", "__")
                 log_dir = RUN_AIDER_LOG_DIR / "no_tests" / file_name
-                log_dir.mkdir(parents=True, exist_ok=True)
-                log_file = log_dir / "run_aider.log"
-
-                aider_cmd = run_aider(
-                    aider_config.llm_name,
-                    [f],
-                    message_to_aider,
-                    test_cmd,
-                    lint_cmd,
-                    log_dir,
+
+                agent.run(
+                    message, "", lint_cmd, [f], log_dir
                 )
 
 
@@ -146,15 +102,15 @@ def main() -> None:
     """
     cs = ConfigStore.instance()
     cs.store(name="user", node=Commit0Config)
-    cs.store(name="user", node=AiderConfig)
+    cs.store(name="user", node=AgentConfig)
 
     hydra.initialize(version_base=None, config_path="configs")
-    config = hydra.compose(config_name="aider")
+    config = hydra.compose(config_name="agent")
 
     commit0_config = Commit0Config(**config.commit0_config)
-    aider_config = AiderConfig(**config.aider_config)
+    agent_config = AgentConfig(**config.agent_config)
 
-    if commit0_config is None or aider_config is None:
+    if commit0_config is None or agent_config is None:
         raise ValueError("Invalid input")
 
     dataset = load_dataset(
@@ -173,6 +129,7 @@ def main() -> None:
             in SPLIT.get(commit0_config.repo_split, [])
         )
     ]
+    assert len(filtered_dataset) > 0, "No examples available"
 
     with tqdm(
         total=len(filtered_dataset), smoothing=0, desc="Running Aider for repos"
@@ -181,10 +138,10 @@ def main() -> None:
             # Create a future for running Aider for each repo
             futures = {
                 executor.submit(
-                    run_aider_for_repo,
+                    run_agent_for_repo,
                     commit0_config,
-                    aider_config,
-                    example if isinstance(example, dict) else {},
+                    agent_config,
+                    example
                 ): example
                 for example in filtered_dataset
             }
diff --git a/commit0/harness/run_pytest_ids.py b/commit0/harness/run_pytest_ids.py
@@ -52,6 +52,8 @@ def main(
     repo_name = None
     for example in dataset:
         repo_name = example["repo"].split("/")[-1]
+        if repo_or_repo_dir.endswith("/"):
+            repo_or_repo_dir = repo_or_repo_dir[:-1]
         if repo_name in os.path.basename(repo_or_repo_dir):
             spec = make_spec(example)
             break