diff --git a/python/composio/tools/env/base.py b/python/composio/tools/env/base.py index b4776f66a1..0125bf95bf 100644 --- a/python/composio/tools/env/base.py +++ b/python/composio/tools/env/base.py @@ -81,7 +81,7 @@ def new(self) -> Shell: def get(self, id: t.Optional[str] = None) -> Shell: """Get shell instance.""" - if id is None: + if id is None or id == "": return self.recent if id not in self._shells: raise ComposioSDKError( diff --git a/python/composio/tools/env/docker/workspace.py b/python/composio/tools/env/docker/workspace.py index 4c578f2368..367de2dd92 100644 --- a/python/composio/tools/env/docker/workspace.py +++ b/python/composio/tools/env/docker/workspace.py @@ -4,6 +4,7 @@ import os import typing as t +from composio.utils.logging import get as get_logger from docker import DockerClient, from_env from docker.errors import DockerException @@ -27,7 +28,10 @@ class DockerWorkspace(Workspace): def __init__(self, image: t.Optional[str] = None) -> None: """Create a docker workspace.""" self.id = generate_id() + logger = get_logger(name="docker_workspace") + logger.info(f"Creating docker workspace with image: {image}") self._image = image or os.environ.get("COMPOSIO_SWE_AGENT", DEFAULT_IMAGE) + logger.info(f"Using image: {self._image}") self._container = self.client.containers.run( image=self._image, command="/bin/bash -l -m", diff --git a/python/composio/tools/env/factory.py b/python/composio/tools/env/factory.py index 15ce4fb529..93a23489e6 100644 --- a/python/composio/tools/env/factory.py +++ b/python/composio/tools/env/factory.py @@ -61,6 +61,8 @@ def new(cls, env: ExecEnv, **kwargs: t.Any) -> Workspace: if env == ExecEnv.HOST: workspace = HostWorkspace(**kwargs) elif env == ExecEnv.DOCKER: + logger = get_logger(name="workspace_factory_new") + logger.info(f"Creating docker workspace with kwargs: {kwargs}") workspace = DockerWorkspace(**kwargs) else: raise ComposioSDKError( diff --git a/python/composio/tools/local/shelltool/git_cmds/actions/git_tree.py b/python/composio/tools/local/shelltool/git_cmds/actions/git_tree.py index fa6d1876cf..a3aba5df18 100644 --- a/python/composio/tools/local/shelltool/git_cmds/actions/git_tree.py +++ b/python/composio/tools/local/shelltool/git_cmds/actions/git_tree.py @@ -33,4 +33,7 @@ def execute( authorisation_data=authorisation_data, shell_id=request_data.shell_id, ) - return ShellExecResponse(stdout=output["stdout"], stderr=output["stderr"]) + return ShellExecResponse( + stdout="Check git_repo_tree.txt for the git-repo-tree results. Use Open File function to check the file.", + stderr=output["stderr"], + ) diff --git a/python/composio/tools/local/shelltool/tests/test_workspace.py b/python/composio/tools/local/shelltool/tests/test_workspace.py index 1e80df9883..3ea2b256c7 100644 --- a/python/composio/tools/local/shelltool/tests/test_workspace.py +++ b/python/composio/tools/local/shelltool/tests/test_workspace.py @@ -110,7 +110,7 @@ def test_git_workflow(self): {}, ) self.assertIsNotNone(get_patch_result) - self.assertIsInstance(get_patch_result, tuple) + self.assertIsInstance(get_patch_result, BaseResponse) self.assertIsInstance(tuple(get_patch_result)[0], tuple) patch_content = ( tuple(tuple(get_patch_result)[0])[1] diff --git a/python/swe/benchmark/get_score_card.py b/python/swe/benchmark/get_score_card.py index 4a4f6636f3..66c0785c47 100644 --- a/python/swe/benchmark/get_score_card.py +++ b/python/swe/benchmark/get_score_card.py @@ -81,7 +81,7 @@ def save_summaries_to_file(predictions_dir, predictions_path, log_dir, scorecard logging.info("- Wrote summary of run to: %s", results_path) -def main(predictions_dir, log_dir, swe_bench_path, model): +def generate_scorecard(predictions_dir, log_dir, swe_bench_path, model): logging.info("Starting main function") eval_refs, _ = get_cur_eval_refs(predictions_dir, swe_bench_path) predictions_path = predictions_dir / Path(PATH_PATCHES_JSON) @@ -201,7 +201,7 @@ def main(predictions_dir, log_dir, swe_bench_path, model): testbed_dir = prediction_path_dir / Path(PATH_TESTBED) if not os.path.exists(testbed_dir): os.makedirs(testbed_dir) - main( + generate_scorecard( predictions_dir=prediction_path_dir, log_dir=str(args.log_dir), swe_bench_path=args.swe_bench_path, diff --git a/python/swe/benchmark/run_evaluation.py b/python/swe/benchmark/run_evaluation.py index c367f0ba42..1c7fdf9dff 100644 --- a/python/swe/benchmark/run_evaluation.py +++ b/python/swe/benchmark/run_evaluation.py @@ -1,22 +1,36 @@ # pylint: disable=logging-fstring-interpolation + import argparse +import asyncio import datetime import logging +import os +from pathlib import Path +from benchmark.constants import MODEL_GPT4 +from benchmark.get_score_card import generate_scorecard +from benchmark.setup_test_bed import create_patches_file from composio_crewai import ComposioToolSet -from composio_swe.config.constants import KEY_API_KEY -from composio_swe.config.context import Context, set_context +from composio_swe.config.constants import ( + KEY_API_KEY, + LOCAL_CACHE_DIRECTORY_NAME, + LOGS_DIR, +) +from composio_swe.config.context import Context, get_context, set_context from composio_swe.config.store import IssueConfig from datasets import load_dataset from rich.logging import RichHandler from composio import Action, Composio from composio.tools.env.factory import ExecEnv, WorkspaceFactory -from examples.crewai_agent import CrewaiAgent, SWEArgs +from swe.examples.crewai_agent import CrewaiAgent, SWEArgs +from swe.swe_bench_docker.evaulate_on_docker import EvaluateOnDockerArgs, evaluate # get logger LOGGER_NAME = "local_workspace" +DATASET_NAME = "princeton-nlp/SWE-bench_Lite" +PATH_TESTBED = "testbed/" handler = RichHandler(show_time=False, show_path=False) handler.setLevel(logging.DEBUG) @@ -26,36 +40,41 @@ logger.propagate = False -# princeton swe bench lite dataset has these fields -# instance_id: (str) - A formatted instance identifier, usually as repo_owner__repo_name-PR-number. -# patch: (str) - The gold patch, the patch generated by the PR (minus test-related code), that resolved the issue. -# repo: (str) - The repository owner/name identifier from GitHub. -# base_commit: (str) - The commit hash of the repository representing the HEAD of the repository before the solution PR is applied. -# hints_text: (str) - Comments made on the issue prior to the creation of the solution PR's first commit creation date. -# created_at: (str) - The creation date of the pull request. -# test_patch: (str) - A test-file patch that was contributed by the solution PR. -# problem_statement: (str) - The issue title and body. -# version: (str) - Installation version to use for running evaluation. -# environment_setup_commit: (str) - commit hash to use for environment setup and installation. -# FAIL_TO_PASS: (str) - A json list of strings that represent the set of tests resolved by the PR and tied to the issue resolution. -# PASS_TO_PASS: (str) - A json list of strings that represent tests that should pass before and after the PR application. - - -def filter_from_repo_name(curr_dataset, repo_name): - filtered_dataset = curr_dataset.filter( - lambda x: x["repo"] == repo_name.strip().lower() - ) - return filtered_dataset - - def get_issues_dataset(test_split): test_dataset = load_dataset( - "princeton-nlp/SWE-bench_Lite", + DATASET_NAME, split=f"test[{test_split}]", ) return test_dataset +def get_score(logs_dir=None): + ctx = get_context() + if logs_dir is None: + logs_dir = ctx.agent_logs_dir + prediction_patches_path = create_patches_file(logs_dir, DATASET_NAME) + print("logs dir: ", logs_dir) + print("prediction_patches_path: ", prediction_patches_path) + evaluate_args = EvaluateOnDockerArgs( + predictions_path=str(prediction_patches_path), + # docker_dir="./docker", + swe_bench_tasks=DATASET_NAME, + namespace="aorwall", + log_dir=str(logs_dir), + ) + asyncio.run(evaluate(**evaluate_args.model_dump())) + prediction_path_dir = Path(prediction_patches_path).parent + testbed_dir = prediction_path_dir / Path(PATH_TESTBED) + if not os.path.exists(testbed_dir): + os.makedirs(testbed_dir) + generate_scorecard( + predictions_dir=prediction_path_dir, + log_dir=str(logs_dir), + swe_bench_path=f"{logs_dir}/dataset", + model=MODEL_GPT4, + ) + + def build_issue_description(hints, problem_statement, include_hints): if not problem_statement or not problem_statement.strip(): raise ValueError("problem statement is empty") @@ -98,13 +117,21 @@ def create_workspace_from_image(repo, repo_to_image_id_map, base_commit): workspace_id = workspace.id workspace_creation_time = datetime.datetime.now() - start_time composio_toolset = ComposioToolSet(workspace_id=workspace_id) + cd_resp = composio_toolset.execute_action( + action=Action.SHELL_EXECUTE_COMMAND, + params={ + "cmd": f"cd /{repo.split('/')[-1]}", + }, + ) + if isinstance(cd_resp, dict) and cd_resp.get("status") == "failure": + raise Exception(f"Error changing directory: {cd_resp['details']}") logger.info( "workspace is created, workspace-id is: %s, creation time: %s", workspace_id, workspace_creation_time, ) logger.info("Resetting repository to base commit") - composio_toolset.execute_action( + reset_resp = composio_toolset.execute_action( action=Action.GITCMDTOOL_GITHUB_CLONE_CMD, params={ "repo_name": repo, @@ -112,6 +139,8 @@ def create_workspace_from_image(repo, repo_to_image_id_map, base_commit): "commit_id": base_commit, }, ) + if isinstance(reset_resp, dict) and reset_resp.get("status") == "failure": + raise Exception(f"Error resetting repository: {reset_resp['details']}") return workspace_id @@ -167,22 +196,23 @@ def setup_workspace(repo, repo_to_workspace_map, repo_to_image_id_map, base_comm ) -def run(test_split, print_only=False, include_hints=True): +def run(test_split, print_only=False, include_hints=True, logs_dir=None): """ Main function to load and display entries from the SWE-bench lite dataset. """ issues = get_issues_dataset(test_split) - repo_to_workspace_map = {} - repo_to_image_id_map = {""} + repo_to_image_id_map = { + "django/django": "techcomposio/swe-bench-django_django", + "astropy/astropy": "kaavee315/astropy_astropy", + } for count, issue in enumerate(issues, 1): try: repo = issue["repo"] print(f"Processing {count}th issue with repoMap: {repo_to_workspace_map}") print(f"Repo: {repo}") print(f"Issue id: {issue['instance_id']}") - print(f"Issue description: {issue['problem_statement']}") if print_only: if include_hints: @@ -197,7 +227,10 @@ def run(test_split, print_only=False, include_hints=True): issue_description = build_issue_description( issue["hints_text"], issue["problem_statement"], include_hints ) - print(f"Issue description: {issue_description}") + print("Issue description (first 10 lines):") + for line in issue_description.split("\n")[:10]: + print(line) + print("...") patch = issue["patch"] install_commit_id = issue["environment_setup_commit"] logger.info( @@ -227,7 +260,7 @@ def run(test_split, print_only=False, include_hints=True): ctx.model_env = model_env_config set_context(ctx) - args = SWEArgs(agent_logs_dir=ctx.agent_logs_dir) + args = SWEArgs(agent_logs_dir=logs_dir or ctx.agent_logs_dir) coder = CrewaiAgent(args=args, workspace_id=workspace_id) coder.setup_and_solve( issue_config=ctx.issue_config, workspace_id=workspace_id @@ -245,7 +278,7 @@ def run(test_split, print_only=False, include_hints=True): parser.add_argument( "--test_split", type=str, - default="1:10", + default="20:40", help="Test split range (e.g., 1:10)", ) parser.add_argument( @@ -258,7 +291,27 @@ def run(test_split, print_only=False, include_hints=True): action="store_true", help="Include hints in the issue description", ) + parser.add_argument( + "--gen_report", + action="store_true", + default=False, + help="Generate a report after running evaluations", + ) + parser.add_argument( + "--logs_dir", + type=str, + default=f"{Path.home()}/{LOCAL_CACHE_DIRECTORY_NAME}/{LOGS_DIR}/{int(datetime.datetime.now().timestamp())}", + help="Logs directory", + ) + args = parser.parse_args() - print("Starting evaluation") - run(args.test_split, args.print_only, args.include_hints) + # Make the log directory if it doesn't exist + logs_dir = Path(args.logs_dir) + if not logs_dir.exists(): + logs_dir.mkdir(parents=True) + + print("Starting evaluation with gen_report: ", args.gen_report) + run(args.test_split, args.print_only, args.include_hints, args.logs_dir) + if args.gen_report: + get_score(args.logs_dir) diff --git a/python/swe/benchmark/setup_test_bed.py b/python/swe/benchmark/setup_test_bed.py index 085f08ba5a..166a0fad60 100644 --- a/python/swe/benchmark/setup_test_bed.py +++ b/python/swe/benchmark/setup_test_bed.py @@ -62,7 +62,7 @@ def log_file(f_name): return False -def main(predictions_dir, dataset_path_or_name): +def create_patches_file(predictions_dir, dataset_path_or_name): all_patches = [] pred_total, pred_will_eval = 0, 0 download_and_store_dataset( @@ -109,6 +109,7 @@ def main(predictions_dir, dataset_path_or_name): print( f"Found {pred_total} total predictions, will evaluate {pred_will_eval} ({pred_total-pred_will_eval} are empty)" ) + return pred_path_orig if __name__ == "__main__": @@ -132,7 +133,7 @@ def main(predictions_dir, dataset_path_or_name): script_path = Path(__file__) script_dir = script_path.parent prediction_path_dir = Path(args.prediction_path_dir) - main( + create_patches_file( predictions_dir=prediction_path_dir, dataset_path_or_name=args.dataset_path_or_name, ) diff --git a/python/swe/composio_swe/agents/base.py b/python/swe/composio_swe/agents/base.py index a9377adb43..5e5afe3174 100644 --- a/python/swe/composio_swe/agents/base.py +++ b/python/swe/composio_swe/agents/base.py @@ -64,6 +64,7 @@ def save(self, instance_id: str) -> None: """Save current history state.""" self.agent_logs[instance_id] = self.current_logs with open(self.task_output_logs, "w", encoding="utf-8") as f: + self.logger.info(f"Saving logs to {self.task_output_logs}") f.write(json.dumps(self.agent_logs)) def setup_and_solve( diff --git a/python/swe/examples/crewai_agent.py b/python/swe/examples/crewai_agent.py index 5ecdc39c42..8011b838b5 100644 --- a/python/swe/examples/crewai_agent.py +++ b/python/swe/examples/crewai_agent.py @@ -24,7 +24,7 @@ def __init__(self, args: SWEArgs, workspace_id: str) -> None: apps=[ App.SEARCHTOOL, App.GITCMDTOOL, - App.FILETOOL, + App.FILEEDITTOOL, App.HISTORYFETCHERTOOL, ] ) diff --git a/python/swe/swe_bench_docker/THIRD-PARTY-LICENSE b/python/swe/swe_bench_docker/THIRD-PARTY-LICENSE new file mode 100644 index 0000000000..2e0623f067 --- /dev/null +++ b/python/swe/swe_bench_docker/THIRD-PARTY-LICENSE @@ -0,0 +1,23 @@ +For the third-party code(python/swe/swe_bench_docker) used in this project, we have included the following licenses: + +MIT License + +Copyright (c) 2024 Albert Örwall + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/swe/swe_bench_docker/__init__.py b/python/swe/swe_bench_docker/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/swe/swe_bench_docker/docker_file_generator/Makefile b/python/swe/swe_bench_docker/docker_file_generator/Makefile new file mode 100644 index 0000000000..55e1814c21 --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/Makefile @@ -0,0 +1,345 @@ +all: + docker build -t techcomposio/swe-bench-conda:bookworm-slim -f docker/Dockerfile . + docker build -t techcomposio/swe-bench-pyenv:bookworm-slim -f docker/pyenv/Dockerfile . + docker build -t techcomposio/swe-bench-pyenvs:bookworm-slim -f docker/pyenv/Dockerfile-pyenvs . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/Dockerfile . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff-testbed:0.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/0.6/Dockerfile . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff-testbed:0.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/0.6/Dockerfile . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff-testbed:0.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/0.6/Dockerfile . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff-testbed:0.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/0.6/Dockerfile . + docker build -t techcomposio/swe-bench-sqlfluff_sqlfluff-testbed:0.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sqlfluff__sqlfluff/0.8/Dockerfile . + docker build -t techcomposio/swe-bench-marshmallow-code_marshmallow:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//marshmallow-code__marshmallow/Dockerfile . + docker build -t techcomposio/swe-bench-marshmallow-code_marshmallow-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//marshmallow-code__marshmallow/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-marshmallow-code_marshmallow-testbed:2.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//marshmallow-code__marshmallow/2.20/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python-testbed:0.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/0.9/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python-testbed:0.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/0.9/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python-testbed:0.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/0.7/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python-testbed:0.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/0.8/Dockerfile . + docker build -t techcomposio/swe-bench-pvlib_pvlib-python-testbed:0.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pvlib__pvlib-python/0.8/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid-testbed:2.14 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/2.14/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid-testbed:2.10 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/2.10/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid-testbed:2.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/2.12/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid-testbed:2.13 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/2.13/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_astroid-testbed:2.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__astroid/2.9/Dockerfile . + docker build -t techcomposio/swe-bench-pyvista_pyvista:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pyvista__pyvista/Dockerfile . + docker build -t techcomposio/swe-bench-pyvista_pyvista-testbed:0.39 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pyvista__pyvista/0.39/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom-testbed:2.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/2.3/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom-testbed:2.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/2.1/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom-testbed:2.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/2.1/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-pydicom_pydicom-testbed:2.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydicom__pydicom/2.0/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:4.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/4.3/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:5.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/5.1/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:5.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/5.1/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:5.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/5.2/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-astropy_astropy-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//astropy__astropy/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-django_django:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.1/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:4.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/4.2/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-django_django-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//django__django/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.3/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.6/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-matplotlib_matplotlib-testbed:3.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//matplotlib__matplotlib/3.7/Dockerfile . + docker build -t techcomposio/swe-bench-mwaskom_seaborn:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//mwaskom__seaborn/Dockerfile . + docker build -t techcomposio/swe-bench-mwaskom_seaborn-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//mwaskom__seaborn/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-mwaskom_seaborn-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//mwaskom__seaborn/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-mwaskom_seaborn-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//mwaskom__seaborn/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-mwaskom_seaborn-testbed:0.13 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//mwaskom__seaborn/0.13/Dockerfile . + docker build -t techcomposio/swe-bench-pallets_flask:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pallets__flask/Dockerfile . + docker build -t techcomposio/swe-bench-pallets_flask-testbed:2.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pallets__flask/2.0/Dockerfile . + docker build -t techcomposio/swe-bench-pallets_flask-testbed:2.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pallets__flask/2.3/Dockerfile . + docker build -t techcomposio/swe-bench-pallets_flask-testbed:2.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pallets__flask/2.3/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:2.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/2.3/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:2.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/2.3/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:2.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/2.4/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:2.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/2.7/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:2.10 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/2.10/Dockerfile . + docker build -t techcomposio/swe-bench-psf_requests-testbed:0.14 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//psf__requests/0.14/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-pydata_xarray-testbed:0.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pydata__xarray/0.12/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.13 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.13/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.14 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.14/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.15 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.15/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.15 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.15/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.15 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.15/Dockerfile . + docker build -t techcomposio/swe-bench-pylint-dev_pylint-testbed:2.15 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pylint-dev__pylint/2.15/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:8.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/8.0/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:8.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/8.0/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:4.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/4.5/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:4.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/4.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:4.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/4.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:4.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/4.6/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:4.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/4.6/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.2/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:5.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/5.4/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:6.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/6.0/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:6.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/6.3/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:7.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/7.0/Dockerfile . + docker build -t techcomposio/swe-bench-pytest-dev_pytest-testbed:7.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//pytest-dev__pytest/7.0/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.20/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.20/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.20/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.20/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.20 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.20/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.21 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.21/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:0.22 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/0.22/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-scikit-learn_scikit-learn-testbed:1.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//scikit-learn__scikit-learn/1.3/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:5.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/5.0/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:5.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/5.1/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:7.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/7.1/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.1/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.2/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.3/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.3 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.3/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.4/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.4/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.4/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:3.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/3.5/Dockerfile . + docker build -t techcomposio/swe-bench-sphinx-doc_sphinx-testbed:4.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sphinx-doc__sphinx/4.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy:bookworm-slim -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.0 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.0/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.1 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.1/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.2 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.2/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.4 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.4/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.5 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.5/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.6 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.6/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.7 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.7/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.8/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.8/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.8/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.8/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.8 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.8/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.9 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.9/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.10 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.10/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.10 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.10/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.11 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.11/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.11 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.11/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.11 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.11/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.12/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.12/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.12/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.12 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.12/Dockerfile . + docker build -t techcomposio/swe-bench-sympy_sympy-testbed:1.13 -f /home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker//sympy__sympy/1.13/Dockerfile . diff --git a/python/swe/swe_bench_docker/docker_file_generator/const.py b/python/swe/swe_bench_docker/docker_file_generator/const.py new file mode 100644 index 0000000000..9ce9e4fb67 --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/const.py @@ -0,0 +1,877 @@ +from enum import Enum + + +PYTHON_ENVIRONMENT_VERSIONS = { + "3.5": "3.5.10", + "3.6": "3.6.15", + "3.7": "3.7.17", + "3.8": "3.8.19", + "3.9": "3.9.19", + "3.10": "3.10.14", + "3.11": "3.11.9", +} + +PYENV_REPOS = { + "astropy/astropy", + "django/django", + "psf/requests", + "scikit-learn/scikit-learn", +} + +MAP_VERSION_TO_INSTALL_SKLEARN = { + k: { + "instance_image": True, + "python": "3.6", + "packages": "numpy==1.19.2 scipy==1.5.2 cython==0.29.7 pytest==4.5.0 pandas matplotlib==3.1.0 joblib threadpoolctl", + "install": "pip install -v --no-build-isolation -e .", + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["0.20", "0.21", "0.22"] +} +MAP_VERSION_TO_INSTALL_SKLEARN.update( + { + k: { + "instance_image": True, + "python": "3.9", + "pre_install": ["pip install setuptools wheel"], + "packages": "numpy scipy cython pytest pandas matplotlib joblib threadpoolctl", + "install": "pip install -v --no-use-pep517 --no-build-isolation -e .", + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["1.3", "1.4"] + } +) + +MAP_VERSION_TO_INSTALL_FLASK = { + "2.0": { + "python": "3.9", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": [ + "Werkzeug==2.3.7", + "Jinja2==3.0.1", + "itsdangerous==2.1.2", + "click==8.0.1", + "MarkupSafe==2.1.3", + ], + }, + "2.1": { + "python": "3.10", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": [ + "click==8.1.3", + "itsdangerous==2.1.2", + "Jinja2==3.1.2", + "MarkupSafe==2.1.1", + "Werkzeug==2.3.7", + ], + }, +} +MAP_VERSION_TO_INSTALL_FLASK.update( + { + k: { + "python": "3.11", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": [ + "click==8.1.3", + "itsdangerous==2.1.2", + "Jinja2==3.1.2", + "MarkupSafe==2.1.1", + "Werkzeug==2.3.7", + ], + } + for k in ["2.2", "2.3"] + } +) + +MAP_VERSION_TO_INSTALL_DJANGO = { + k: { + "python": "3.5", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + } + for k in ["1.7", "1.8", "1.9", "1.10", "1.11", "2.0", "2.1", "2.2"] +} +MAP_VERSION_TO_INSTALL_DJANGO.update( + { + k: {"python": "3.5", "install": "python setup.py install"} + for k in ["1.4", "1.5", "1.6"] + } +) +MAP_VERSION_TO_INSTALL_DJANGO.update( + { + k: { + "python": "3.6", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + } + for k in ["3.0", "3.1", "3.2"] + } +) +MAP_VERSION_TO_INSTALL_DJANGO.update( + { + k: { + "python": "3.8", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + } + for k in ["4.0"] + } +) +MAP_VERSION_TO_INSTALL_DJANGO.update( + { + k: { + "python": "3.9", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + } + for k in ["4.1", "4.2"] + } +) +MAP_VERSION_TO_INSTALL_DJANGO.update( + { + k: { + "python": "3.11", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + } + for k in ["5.0"] + } +) +for k in ["2.2", "3.0", "3.1"]: + MAP_VERSION_TO_INSTALL_DJANGO[k].update( + {"env_vars_test": {"LANG": "en_US.UTF-8", "LC_ALL": "en_US.UTF-8"}} + ) + +MAP_VERSION_TO_INSTALL_REQUESTS = { + k: {"python": "3.9", "packages": "pytest", "install": "python -m pip install ."} + for k in ["0.7", "0.8", "0.9", "0.11", "0.13", "0.14", "1.1", "1.2", "2.0", "2.2"] + + ["2.3", "2.4", "2.5", "2.7", "2.8", "2.9", "2.10", "2.11", "2.12", "2.17"] + + ["2.18", "2.19", "2.22", "2.26", "2.25", "2.27", "3.0"] +} + +MAP_VERSION_TO_INSTALL_SEABORN = { + k: { + "python": "3.9", + "install": "pip install -e .", + "pip_packages": [ + "contourpy==1.1.0", + "cycler==0.11.0", + "fonttools==4.42.1", + "importlib-resources==6.0.1", + "kiwisolver==1.4.5", + "matplotlib==3.7.2", + "numpy==1.25.2", + "packaging==23.1", + "pandas==2.1.0", + "pillow==10.0.0", + "pyparsing==3.0.9", + "pytest", + "python-dateutil==2.8.2", + "pytz==2023.3.post1", + "scipy==1.11.2", + "six==1.16.0", + "tzdata==2023.1", + "zipp==3.16.2", + ], + } + for k in ["0.11"] +} +MAP_VERSION_TO_INSTALL_SEABORN.update( + { + k: { + "python": "3.9", + "install": "pip install -e .[dev]", + "pip_packages": [ + "contourpy==1.1.0", + "cycler==0.11.0", + "fonttools==4.42.1", + "importlib-resources==6.0.1", + "kiwisolver==1.4.5", + "matplotlib==3.7.2", + "numpy==1.25.2", + "packaging==23.1", + "pandas==2.1.0", + "pillow==10.0.0", + "pyparsing==3.0.9", + "python-dateutil==2.8.2", + "pytz==2023.3.post1", + "scipy==1.11.2", + "six==1.16.0", + "tzdata==2023.1", + "zipp==3.16.2", + ], + } + for k in ["0.12", "0.13"] + } +) + +MAP_VERSION_TO_INSTALL_PYTEST = { + k: {"python": "3.9", "install": "pip install -e ."} + for k in [ + "4.4", + "4.5", + "4.6", + "5.0", + "5.1", + "5.2", + "5.3", + "5.4", + "6.0", + "6.2", + "6.3", + "7.0", + "7.1", + "7.2", + "7.4", + "8.0", + ] +} +MAP_VERSION_TO_INSTALL_PYTEST["4.4"]["pip_packages"] = [ + "atomicwrites==1.4.1", + "attrs==23.1.0", + "more-itertools==10.1.0", + "pluggy==0.13.1", + "py==1.11.0", + "setuptools==68.0.0", + "six==1.16.0", +] +MAP_VERSION_TO_INSTALL_PYTEST["4.5"]["pip_packages"] = [ + "atomicwrites==1.4.1", + "attrs==23.1.0", + "more-itertools==10.1.0", + "pluggy==0.11.0", + "py==1.11.0", + "setuptools==68.0.0", + "six==1.16.0", + "wcwidth==0.2.6", +] +MAP_VERSION_TO_INSTALL_PYTEST["4.6"]["pip_packages"] = [ + "atomicwrites==1.4.1", + "attrs==23.1.0", + "more-itertools==10.1.0", + "importlib-metadata==1.6.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "six==1.16.0", + "wcwidth==0.2.6", +] +for k in ["5.0", "5.1", "5.2"]: + MAP_VERSION_TO_INSTALL_PYTEST[k]["pip_packages"] = [ + "atomicwrites==1.4.1", + "attrs==23.1.0", + "more-itertools==10.1.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "wcwidth==0.2.6", + ] +MAP_VERSION_TO_INSTALL_PYTEST["5.3"]["pip_packages"] = [ + "attrs==23.1.0", + "more-itertools==10.1.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "wcwidth==0.2.6", +] +MAP_VERSION_TO_INSTALL_PYTEST["5.4"]["pip_packages"] = [ + "py==1.11.0", + "packaging==23.1", + "attrs==23.1.0", + "more-itertools==10.1.0", + "pluggy==0.13.1", + "wcwidth==0.2.6", +] +MAP_VERSION_TO_INSTALL_PYTEST["5.4"]["pre_test"] = ["pip install -e ."] +MAP_VERSION_TO_INSTALL_PYTEST["6.0"]["pip_packages"] = [ + "attrs==23.1.0", + "iniconfig==2.0.0", + "more-itertools==10.1.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "toml==0.10.2", +] +for k in ["6.2", "6.3"]: + MAP_VERSION_TO_INSTALL_PYTEST[k]["pip_packages"] = [ + "attrs==23.1.0", + "iniconfig==2.0.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "toml==0.10.2", + ] +MAP_VERSION_TO_INSTALL_PYTEST["7.0"]["pip_packages"] = [ + "attrs==23.1.0", + "iniconfig==2.0.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", +] +for k in ["7.1", "7.2"]: + MAP_VERSION_TO_INSTALL_PYTEST[k]["pip_packages"] = [ + "attrs==23.1.0", + "iniconfig==2.0.0", + "packaging==23.1", + "pluggy==0.13.1", + "py==1.11.0", + "tomli==2.0.1", + ] +MAP_VERSION_TO_INSTALL_PYTEST["7.4"]["pip_packages"] = [ + "iniconfig==2.0.0", + "packaging==23.1", + "pluggy==1.3.0", + "exceptiongroup==1.1.3", + "tomli==2.0.1", +] +MAP_VERSION_TO_INSTALL_PYTEST["8.0"]["pip_packages"] = [ + "iniconfig==2.0.0", + "packaging==23.1", + "pluggy==1.3.0", + "exceptiongroup==1.1.3", + "tomli==2.0.1", +] + +MAP_VERSION_TO_INSTALL_MATPLOTLIB = { + k: { + "python": "3.11", + "packages": "environment.yml", + "install": "python -m pip install -e .", + "pip_packages": [ + "contourpy==1.1.0", + "cycler==0.11.0", + "fonttools==4.42.1", + "kiwisolver==1.4.5", + "numpy==1.25.2", + "packaging==23.1", + "pillow==10.0.0", + "pyparsing==3.0.9", + "python-dateutil==2.8.2", + "six==1.16.0", + "setuptools==66.1.1", + "setuptools-scm==7.1.0", + "typing-extensions==4.7.1", + ], + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["3.5", "3.6", "3.7"] +} + +MAP_VERSION_TO_INSTALL_MATPLOTLIB.update( + { + k: { + "python": "3.8", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + "pip_packages": [], + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["3.1", "3.2", "3.3", "3.4"] + } +) +MAP_VERSION_TO_INSTALL_MATPLOTLIB.update( + { + k: { + "python": "3.7", + "packages": "requirements.txt", + "install": "python -m pip install -e .", + "pip_packages": ["freetype"], + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["3.0"] + } +) +MAP_VERSION_TO_INSTALL_MATPLOTLIB.update( + { + k: { + "python": "3.5", + "install": "python setup.py build; python setup.py install", + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + }, + } + for k in ["2.0", "2.1", "2.2", "1.0", "1.1", "1.2", "1.3", "1.4", "1.5"] + } +) + +MAP_VERSION_TO_INSTALL_SPHINX = { + k: { + "python": "3.9", + "pip_packages": ["tox"], + "install": "pip install -e .[test]", + "pre_install": ["sed -i 's/pytest/pytest -rA/' tox.ini"], + "arch_specific_packages": { + "aarch64": "gxx_linux-aarch64 gcc_linux-aarch64 make", + "x86_64": "gxx_linux-64 gcc_linux-64 make", + }, + } + for k in ["1.5", "1.6", "1.7", "1.8", "2.0", "2.1", "2.2", "2.3", "2.4", "3.0"] + + ["3.1", "3.2", "3.3", "3.4", "3.5", "4.0", "4.1", "4.2", "4.3", "4.4"] + + ["4.5", "5.0", "5.1", "5.2", "5.3", "6.0", "6.2", "7.0", "7.1", "7.2"] +} +for k in ["3.0", "3.1", "3.2", "3.3", "3.4", "3.5", "4.0", "4.1", "4.2", "4.3", "4.4"]: + MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend( + [ + "sed -i 's/Jinja2>=2.3/Jinja2<3.0/' setup.py", + "sed -i 's/sphinxcontrib-applehelp/sphinxcontrib-applehelp<=1.0.7/' setup.py", + "sed -i 's/sphinxcontrib-devhelp/sphinxcontrib-devhelp<=1.0.5/' setup.py", + "sed -i 's/sphinxcontrib-qthelp/sphinxcontrib-qthelp<=1.0.6/' setup.py", + "sed -i 's/alabaster>=0.7,<0.8/alabaster>=0.7,<0.7.12/' setup.py", + "sed -i \"s/'packaging',/'packaging', 'markupsafe<=2.0.1',/\" setup.py", + ] + ) + if k in ["4.2", "4.3", "4.4"]: + MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend( + [ + "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py", + "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py", + ] + ) + elif k == "4.1": + MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend( + [ + ( + "grep -q 'sphinxcontrib-htmlhelp>=2.0.0' setup.py && " + "sed -i 's/sphinxcontrib-htmlhelp>=2.0.0/sphinxcontrib-htmlhelp>=2.0.0,<=2.0.4/' setup.py || " + "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py" + ), + ( + "grep -q 'sphinxcontrib-serializinghtml>=1.1.5' setup.py && " + "sed -i 's/sphinxcontrib-serializinghtml>=1.1.5/sphinxcontrib-serializinghtml>=1.1.5,<=1.1.9/' setup.py || " + "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py" + ), + ] + ) + else: + MAP_VERSION_TO_INSTALL_SPHINX[k]["pre_install"].extend( + [ + "sed -i 's/sphinxcontrib-htmlhelp/sphinxcontrib-htmlhelp<=2.0.4/' setup.py", + "sed -i 's/sphinxcontrib-serializinghtml/sphinxcontrib-serializinghtml<=1.1.9/' setup.py", + ] + ) + +for spec in MAP_VERSION_TO_INSTALL_SPHINX.values(): + spec["pre_test"] = spec["pre_install"] + +MAP_VERSION_TO_INSTALL_ASTROPY = { + k: { + "python": "3.9", + "install": "pip install -e .[test]", + "pre_install": ["pip install setuptools==68.0.0"], + "pip_packages": [ + "attrs==23.1.0", + "exceptiongroup==1.1.3", + "execnet==2.0.2", + "hypothesis==6.82.6", + "iniconfig==2.0.0", + "numpy==1.23.4", + "packaging==23.1", + "pluggy==1.3.0", + "psutil==5.9.5", + "pyerfa==2.0.0.3", + "pytest-arraydiff==0.5.0", + "pytest-astropy-header==0.2.2", + "pytest-astropy==0.10.0", + "pytest-cov==4.1.0", + "pytest-doctestplus==1.0.0", + "pytest-filter-subpackage==0.1.2", + "pytest-mock==3.11.1", + "pytest-openfiles==0.5.0", + "pytest-remotedata==0.4.0", + "pytest-xdist==3.3.1", + "pytest==7.4.0", + "PyYAML==6.0.1", + "sortedcontainers==2.4.0", + "tomli==2.0.1", + ], + } + for k in ["0.1", "0.2", "0.3", "0.4", "1.1", "1.2", "1.3", "3.0", "3.1", "3.2"] + + ["4.1", "4.2", "4.3", "5.0", "5.1", "5.2"] +} + +MAP_VERSION_TO_INSTALL_SYMPY = { + k: { + "python": "3.9", + "packages": "mpmath flake8", + "pip_packages": ["mpmath==1.3.0", "flake8-comprehensions"], + "install": "pip install -e .", + } + for k in ["1.10", "1.11", "1.12", "1.2", "1.4", "1.5", "1.6"] + + ["1.7", "1.8", "1.9"] +} + +MAP_VERSION_TO_INSTALL_SYMPY.update( + { + k: { + "python": "3.7", + "packages": "mpmath flake8", + "pip_packages": ["mpmath==1.3.0", "flake8-comprehensions"], + "install": "pip install -e .", + } + for k in ["0.7", "1.0", "1.1"] + } +) + +MAP_VERSION_TO_INSTALL_SYMPY.update( + { + k: { + "python": "3.9", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": ["mpmath==1.3.0"], + } + for k in ["1.13"] + } +) + +MAP_VERSION_TO_INSTALL_PYLINT = { + k: { + "python": "3.9", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": ["pytest"], + } + for k in [ + "2.10", + "2.11", + "2.13", + "2.14", + "2.15", + "2.16", + "2.17", + "2.8", + "2.9", + "3.0", + ] +} + +MAP_VERSION_TO_INSTALL_PYLINT["2.15"]["pre_test"] = ["pip install -e ."] + +MAP_VERSION_TO_INSTALL_PYLINT.update( + { + k: {**MAP_VERSION_TO_INSTALL_PYLINT[k], "pip_packages": ["toml", "pytest"]} + for k in ["2.13"] + } +) + +MAP_VERSION_TO_INSTALL_PYLINT.update( + { + k: { + **MAP_VERSION_TO_INSTALL_PYLINT[k], + "pip_packages": ["astroid==3.0.0a6", "pytest"], + } + for k in ["3.0"] + } +) + +MAP_VERSION_TO_INSTALL_XARRAY = { + k: { + "python": "3.10", + "packages": "environment.yml", + "install": "pip install -e .", + "pip_packages": [ + "numpy==1.25.2", + "packaging==23.1", + "pandas==1.5.3", + "pytest==8.1.1", + "python-dateutil==2.8.2", + "pytz==2023.3", + "six==1.16.0", + ], + "no_use_env": True, + } + for k in ["0.12", "0.18", "0.19", "0.20", "2022.03", "2022.06", "2022.09"] +} + +MAP_VERSION_TO_INSTALL_SQLFLUFF = { + k: { + "python": "3.9", + "packages": "requirements.txt", + "install": "pip install -e .", + } + for k in [ + "0.10", + "0.11", + "0.12", + "0.13", + "0.4", + "0.6", + "0.8", + "0.9", + "1.1", + "1.2", + "1.3", + "1.4", + "2.0", + "2.1", + "2.2", + ] +} + +MAP_VERSION_TO_INSTALL_PYVISTA = { + k: { + "python": "3.9", + "install": "pip install -e .", + "pip_packages": ["pytest"], + } + for k in ["0.20", "0.21", "0.22", "0.23"] +} +MAP_VERSION_TO_INSTALL_PYVISTA.update( + { + k: { + "python": "3.9", + "packages": "requirements.txt", + "install": "pip install -e .", + "pip_packages": ["pytest"], + } + for k in [ + "0.24", + "0.25", + "0.26", + "0.27", + "0.28", + "0.29", + "0.30", + "0.31", + "0.32", + "0.33", + "0.34", + "0.35", + "0.36", + "0.37", + "0.38", + "0.39", + "0.40", + "0.41", + "0.42", + "0.43", + ] + } +) + +MAP_VERSION_TO_INSTALL_ASTROID = { + k: { + "python": "3.9", + "install": "pip install -e .", + "pip_packages": ["pytest"], + } + for k in ["2.10", "2.12", "2.13", "2.14", "2.15", "2.5", "2.6", "2.7", "2.9", "3.0"] +} +for k in ["2.5", "2.6"]: + MAP_VERSION_TO_INSTALL_ASTROID[k]["pip_packages"].extend( + ["lazy_object_proxy==1.9.0", "wrapt==1.12.1"] + ) +for k in ["2.9", "2.10"]: + MAP_VERSION_TO_INSTALL_ASTROID[k]["pip_packages"].extend( + [ + "lazy_object_proxy==1.9.0", + "wrapt==1.13.3", + "typing-extensions==4.8.0", + "setuptools==68.0.0", + ] + ) +for k in ["2.12", "2.13", "2.14", "2.15"]: + MAP_VERSION_TO_INSTALL_ASTROID[k]["pip_packages"].extend( + ["lazy_object_proxy==1.9.0", "wrapt==1.15.0", "typing-extensions==4.8.0"] + ) +MAP_VERSION_TO_INSTALL_ASTROID["2.7"]["pip_packages"].extend( + ["lazy_object_proxy==1.9.0", "wrapt==1.12.1", "typing-extensions==4.8.0"] +) +MAP_VERSION_TO_INSTALL_ASTROID["3.0"]["pip_packages"].append("typing-extensions==4.8.0") + + +MAP_VERSION_TO_INSTALL_MARSHMALLOW = { + k: { + "python": "3.9", + "install": "pip install -e '.[dev]'", + } + for k in ["2.18", "2.19", "2.20", "3.0", "3.12", "3.19", "3.9"] +} + +MAP_VERSION_TO_INSTALL_PVLIB = { + k: { + "python": "3.9", + "install": "pip install -e .[all]", + "packages": "pandas scipy", + "pip_packages": ["jupyter", "ipython", "matplotlib", "pytest", "flake8"], + } + for k in ["0.5", "0.6", "0.7", "0.8", "0.9"] +} + +MAP_VERSION_TO_INSTALL_PYDICOM = { + k: { + "python": "3.6", + "install": "pip install -e .", + "packages": "numpy", + "pip_packages": ["pytest"], + } + for k in ["1.2", "1.3", "1.4", "2.0", "2.1", "2.2", "2.3"] +} +MAP_VERSION_TO_INSTALL_PYDICOM.update( + { + k: { + **MAP_VERSION_TO_INSTALL_PYDICOM[k], + "python": "3.8", + "pip_packages": ["pytest==4.6.11"], + } + for k in ["1.4", "2.0"] + } +) +MAP_VERSION_TO_INSTALL_PYDICOM.update( + { + k: { + **MAP_VERSION_TO_INSTALL_PYDICOM[k], + "python": "3.9", + "pip_packages": ["pytest==4.6.11"], + } + for k in ["2.1", "2.2"] + } +) +MAP_VERSION_TO_INSTALL_PYDICOM.update( + {k: {**MAP_VERSION_TO_INSTALL_PYDICOM[k], "python": "3.10"} for k in ["2.3"]} +) + +MAP_VERSION_TO_INSTALL_HUMANEVAL = {k: {"python": "3.9"} for k in ["1.0"]} + +# Constants - Task Instance Instllation Environment +MAP_VERSION_TO_INSTALL = { + "astropy/astropy": MAP_VERSION_TO_INSTALL_ASTROPY, + "django/django": MAP_VERSION_TO_INSTALL_DJANGO, + "matplotlib/matplotlib": MAP_VERSION_TO_INSTALL_MATPLOTLIB, + "marshmallow-code/marshmallow": MAP_VERSION_TO_INSTALL_MARSHMALLOW, + "mwaskom/seaborn": MAP_VERSION_TO_INSTALL_SEABORN, + "pallets/flask": MAP_VERSION_TO_INSTALL_FLASK, + "psf/requests": MAP_VERSION_TO_INSTALL_REQUESTS, + "pvlib/pvlib-python": MAP_VERSION_TO_INSTALL_PVLIB, + "pydata/xarray": MAP_VERSION_TO_INSTALL_XARRAY, + "pydicom/pydicom": MAP_VERSION_TO_INSTALL_PYDICOM, + "pylint-dev/astroid": MAP_VERSION_TO_INSTALL_ASTROID, + "pylint-dev/pylint": MAP_VERSION_TO_INSTALL_PYLINT, + "pytest-dev/pytest": MAP_VERSION_TO_INSTALL_PYTEST, + "pyvista/pyvista": MAP_VERSION_TO_INSTALL_PYVISTA, + "scikit-learn/scikit-learn": MAP_VERSION_TO_INSTALL_SKLEARN, + "sphinx-doc/sphinx": MAP_VERSION_TO_INSTALL_SPHINX, + "sqlfluff/sqlfluff": MAP_VERSION_TO_INSTALL_SQLFLUFF, + "swe-bench/humaneval": MAP_VERSION_TO_INSTALL_HUMANEVAL, + "sympy/sympy": MAP_VERSION_TO_INSTALL_SYMPY, +} + +# Constants - Repository Specific Installation Instructions +MAP_REPO_TO_INSTALL = {} + +# Constants - Task Instance Test Frameworks +TEST_PYTEST = "pytest --no-header -rA --tb=no -p no:cacheprovider" +TEST_PYTEST_SKIP_NO_HEADER = "pytest -rA --tb=no -p no:cacheprovider" +MAP_REPO_TO_TEST_FRAMEWORK = { + "astropy/astropy": TEST_PYTEST, + "django/django": "./tests/runtests.py --verbosity 2", + "marshmallow-code/marshmallow": TEST_PYTEST, + "matplotlib/matplotlib": TEST_PYTEST, + "mwaskom/seaborn": "pytest --no-header -rA", + "pallets/flask": TEST_PYTEST, + "psf/requests": TEST_PYTEST, + "pvlib/pvlib-python": TEST_PYTEST, + "pydata/xarray": TEST_PYTEST, + "pydicom/pydicom": TEST_PYTEST_SKIP_NO_HEADER, + "pylint-dev/astroid": TEST_PYTEST, + "pylint-dev/pylint": TEST_PYTEST, + "pytest-dev/pytest": "pytest -rA", + "pyvista/pyvista": TEST_PYTEST, + "scikit-learn/scikit-learn": TEST_PYTEST_SKIP_NO_HEADER, + "sphinx-doc/sphinx": "tox -epy39 -v --", + "sqlfluff/sqlfluff": TEST_PYTEST, + "swe-bench/humaneval": "python", + "sympy/sympy": "bin/test -C --verbose", +} + +# Constants - Task Instance Requirements File Paths +MAP_REPO_TO_REQS_PATHS = { + "django/django": ["tests/requirements/py3.txt"], + "matplotlib/matplotlib": [ + "requirements/dev/dev-requirements.txt", + "requirements/testing/travis_all.txt", + ], + "pallets/flask": ["requirements/dev.txt"], + "pylint-dev/pylint": ["requirements_test.txt"], + "pyvista/pyvista": ["requirements_test.txt", "requirements.txt"], + "sqlfluff/sqlfluff": ["requirements_dev.txt"], + "sympy/sympy": ["requirements-dev.txt"], +} + +# Constants - Task Instance environment.yml File Paths +MAP_REPO_TO_ENV_YML_PATHS = { + "matplotlib/matplotlib": ["environment.yml"], + "pydata/xarray": ["ci/requirements/environment.yml", "environment.yml"], +} + +MAP_REPO_TO_DEB_PACKAGES = { + "matplotlib/matplotlib": [ + "texlive", + "texlive-xetex", + "dvipng", + "ghostscript", + "libfreetype-dev", + "libtiff-dev", + ], + "pyvista/pyvista": ["libgl1", "libxrender1"], +} + +# Constants - Evaluation Keys +KEY_INSTANCE_ID = "instance_id" +KEY_MODEL = "model_name_or_path" +KEY_PREDICTION = "model_patch" + +# Constants - Logging +APPLY_PATCH_FAIL = ">>>>> Patch Apply Failed" +APPLY_PATCH_PASS = ">>>>> Applied Patch" +INSTALL_FAIL = ">>>>> Init Failed" +INSTALL_PASS = ">>>>> Init Succeeded" +INSTALL_TIMEOUT = ">>>>> Init Timed Out" +RESET_FAILED = ">>>>> Reset Failed" +TESTS_ERROR = ">>>>> Tests Errored" +TESTS_FAILED = ">>>>> Some Tests Failed" +TESTS_PASSED = ">>>>> All Tests Passed" +TESTS_TIMEOUT = ">>>>> Tests Timed Out" + + +# Constants - Patch Types +class PatchType(Enum): + PATCH_GOLD = "gold" + PATCH_PRED = "pred" + PATCH_PRED_TRY = "pred_try" + PATCH_PRED_MINIMAL = "pred_minimal" + PATCH_PRED_MINIMAL_TRY = "pred_minimal_try" + PATCH_TEST = "test" + + def __str__(self): + return self.value + + +# Constants - Miscellaneous +NON_TEST_EXTS = [ + ".json", + ".png", + "csv", + ".txt", + ".md", + ".jpg", + ".jpeg", + ".pkl", + ".yml", + ".yaml", + ".toml", +] +SWE_BENCH_URL_RAW = "https://raw.githubusercontent.com/" diff --git a/python/swe/swe_bench_docker/docker_file_generator/context_manager.py b/python/swe/swe_bench_docker/docker_file_generator/context_manager.py new file mode 100644 index 0000000000..c8a7c264ed --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/context_manager.py @@ -0,0 +1,346 @@ +import json +import logging +import os +import subprocess +from logging import DEBUG, ERROR, INFO, Logger +from traceback import format_exc + +from swebench_docker.constants import ( + APPLY_PATCH_FAIL, + APPLY_PATCH_PASS, + INSTALL_FAIL, + KEY_INSTANCE_ID, + KEY_MODEL, + MAP_VERSION_TO_INSTALL, + PatchType, + TESTS_ERROR, + TESTS_FAILED, + TESTS_PASSED, + TESTS_TIMEOUT, +) + + +logger_taskenv = logging.getLogger("taskenv") + + +class LogWrapper: + def __init__( + self, + log_file: str, + logger: Logger = None, + prefix: str = None, + ): + self.log_file = log_file + self.logger = logger + self.prefix = prefix + + def write(self, message: str, mode: str = "a", level: int = INFO): + with open(self.log_file, mode, encoding="utf-8") as f: + log = ( + f"{self.prefix} {message} \n" + if self.prefix is not None + else f"{message} \n" + ) + f.write(log) + if self.logger is not None: + self.logger.log(level, message) + + +class ExecWrapper: + def __init__( + self, + subprocess_args: dict = None, + logger: LogWrapper = None, + ): + self.logger = logger + if subprocess_args is None: + self.subprocess_args = {} + else: + self.subprocess_args = subprocess_args + + def __call__(self, cmd, raise_error=True, **kwargs): + try: + if isinstance(cmd, list): + self.logger.write(f"Command: {' '.join(cmd)}", level=DEBUG) + else: + self.logger.write(f"Command: {cmd}", level=DEBUG) + combined_args = {**self.subprocess_args, **kwargs} + self.logger.write( + f"Subprocess args: {json.dumps(combined_args)}", level=DEBUG + ) + output = subprocess.run(cmd, **combined_args, check=False) + self.logger.write(f"Std. Output:\n{output.stdout}", level=DEBUG) + if output.stderr: + self.logger.write(f"Std. Error:\n{output.stderr}", level=DEBUG) + self.logger.write(f"Return Code: {output.returncode}", level=DEBUG) + return output + except subprocess.CalledProcessError as e: + if raise_error and self.logger is not None: + self.logger.write(f"Error: {e}", level=ERROR) + self.logger.write(f"Error stdout: {e.stdout}", level=ERROR) + if e.stderr: + self.logger.write(f"Error stderr: {e.stderr}", level=ERROR) + self.logger.write(f"Error traceback: {format_exc()}", level=ERROR) + raise e + + +class TaskEnvContextManager: + def __init__( + self, + task_instance: dict, + testbed_name: str, + repo_dir: str, + log_dir: str, + log_suffix: str = None, + timeout: int = None, + is_eval: bool = True, + image_type: str = "conda", + ): + self.instance_id = task_instance[KEY_INSTANCE_ID] + self.instance = task_instance + self.testbed_name = testbed_name + self.repo_dir = repo_dir + self.cwd = os.getcwd() + self.is_eval = is_eval + self.image_type = image_type + + model = task_instance[KEY_MODEL] + if image_type == "conda": + self.cmd_conda_run = f"conda run -n {testbed_name} " + else: + self.cmd_conda_run = "" + + self.timeout = timeout + + log_file_name = f"{self.instance_id}.{model}.eval.log" + + if log_suffix: + log_file_name = f"{self.instance_id}.{model}.{log_suffix}.eval.log" + + self.log_file = os.path.join(log_dir, log_file_name) + self.log = LogWrapper( + self.log_file, + logger=logger_taskenv, + prefix=f"[{testbed_name}] [{self.instance_id}]", + ) + + self.exec = ExecWrapper( + subprocess_args={ + "cwd": self.repo_dir, + "check": True, + "shell": False, + # "capture_output": False, + "universal_newlines": True, + "stdout": subprocess.PIPE, + "stderr": subprocess.STDOUT, + }, + logger=self.log, + ) + + def __enter__(self): + """ + Enter task environment, set up log file + """ + os.chdir(self.repo_dir) + enter_msg = ( + f"Task Metadata:" + f"\n\t- Instance ID: {self.instance[KEY_INSTANCE_ID]}" + f"\n\t- Testbed: {self.testbed_name}" + ) + if self.is_eval: + enter_msg += f"\n\t- Evaluation Model: {self.instance[KEY_MODEL]}" + + output = self.exec("python --version".split()) + enter_msg += f"\n\t- Python version: {output.stdout}" + + self.log.write(enter_msg, mode="w") + + self.exec( + f"git config --global --add safe.directory {self.repo_dir}".split(" ") + ) + self.exec( + f"git -c advice.detachedHead=false checkout {self.instance['base_commit']}".split( + " " + ) + ) + + specifications = MAP_VERSION_TO_INSTALL[self.instance["repo"]][ + self.instance["version"] + ] + if "pre_test" in specifications: + for cmd_pre_install in specifications["pre_test"]: + self.log.write(f"Running pre-test command: {cmd_pre_install}") + cmd_pre_install = f"{self.cmd_conda_run} {cmd_pre_install}" + + out_pre_install = self.exec( + cmd_pre_install, timeout=self.timeout, shell=True + ) + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"Pre-installation Command: {cmd_pre_install}\n") + f.write(f"Std. Output: {out_pre_install.stdout}\n") + if out_pre_install.stderr: + f.write(f"Std. Error: {out_pre_install.stderr}\n") + if out_pre_install.returncode != 0: + self.log.write("Pre-install setup failed", level=ERROR) + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"\n{INSTALL_FAIL}\n") + return False + + return self + + def apply_patch( + self, patch: str, patch_type: PatchType = "", revert: bool = False + ) -> bool: + """ + Apply patch to task environment + + Args: + patch (str): Plaintext of patch to apply + patch_type (str): Type of patch (e.g. "eval", "test") + Returns: + bool: True if patch applied successfully, False otherwise + """ + init_diff_patch_path = os.path.join( + os.path.dirname(self.repo_dir.rstrip("/")), + f"temp_{self.instance_id}_{patch_type}_init.patch", + ) + self.exec(f"git diff > {init_diff_patch_path}", shell=True) + + # If patch is `None`, indicate in log and skip + if patch is None: + self.log.write(f"Patch is `None` ({patch_type})") + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"{APPLY_PATCH_FAIL}; Prediction patch is `None`") + return False + + # Write patch to temporary patch file in parent directory + patch_path = os.path.join( + os.path.dirname(self.repo_dir.rstrip("/")), + f"temp_{self.instance_id}_{patch_type}.patch", + ) + + with open(patch_path, "w", encoding="utf-8") as f: + f.write(patch) + + # Restore test files before applying if patch_type is 'test' + if patch_type == PatchType.PATCH_TEST.value: + for test in self.instance["test_directives"]: + if os.path.exists(test): + self.exec(f"git restore {test}".split(" ")) + + # Apply patch to testbed directory + apply_cmd = ( + f"git apply -v -R {patch_path}" if revert else f"git apply -v {patch_path}" + ) + out_patch = self.exec(apply_cmd.split(" "), raise_error=False, check=False) + + # If git command fails, try patch command + if out_patch.returncode != 0: + # Patch may has been partially applied so we should revert it. + # NOTE: we do not revert the test patch because it may unintentionally revert previously applied patches + if patch_type != PatchType.PATCH_TEST.value: + self.exec("git restore .".split(" ")) + # revert to the state of the repo before the patch was applied + output = self.exec( + f"git apply {init_diff_patch_path}".split(), + raise_error=False, + check=False, + ) + self.log.write( + f"Output (git apply - revert to initial state): {output.stdout}" + ) + apply_cmd = ( + f"patch -R --batch --fuzz=5 -p1 -i {patch_path}" + if revert + else f"patch --batch --fuzz=5 -p1 -i {patch_path}" + ) + out_patch = self.exec(apply_cmd.split(" "), raise_error=False, check=False) + + # TODO os.remove(patch_path) + + log_cmd = "Revert" if revert else "Apply" + if out_patch.returncode != 0: + # Patch apply failed + self.log.write(f"{log_cmd} patch failed ({patch_type})", level=ERROR) + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"{APPLY_PATCH_FAIL}; ({patch_type})\nOutput:\n") + f.write(out_patch.stdout) + if out_patch.stderr: + f.write(out_patch.stderr) + if ( + patch_type != PatchType.PATCH_TEST.value + and "patching" in out_patch.stdout + ): + # Patch has been partially applied so we should revert it. + self.exec("git restore .".split(" ")) + # revert to the state of the repo before the patch was applied + output = self.exec( + f"git apply {init_diff_patch_path}".split(), + raise_error=False, + check=False, + ) + self.log.write( + f"Output (git apply - revert to initial state): {output.stdout}" + ) + return False + + # Patch apply succeeded + self.log.write(f"{log_cmd} patch successful ({patch_type})") + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"{APPLY_PATCH_PASS} ({patch_type})\n") + return True + + def run_tests_task(self, instance: dict): + """ + Run tests for task instance + + Args: + instance (dict): Task instance + Returns: + bool: True if test script ran successfully, False otherwise + """ + try: + # Run test command for task instance + specifications = MAP_VERSION_TO_INSTALL[self.instance["repo"]][ + self.instance["version"] + ] + if "image" in specifications and specifications["image"] == "python": + test_cmd = instance["test_cmd"] + else: + test_cmd = f"{self.cmd_conda_run} {instance['test_cmd']}" + + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"Test Script: {test_cmd};\n") + + out_test = self.exec( + test_cmd.split(), shell=False, timeout=self.timeout, check=False + ) + + # Write pass/fail status to log file + with open(self.log_file, "a", encoding="utf-8") as f: + if out_test.returncode != 0: + f.write(f"\n{TESTS_FAILED}\n") + else: + f.write(f"\n{TESTS_PASSED}\n") + + self.log.write("Test script run successful") + return True + except subprocess.TimeoutExpired: + # Test command run timed out + self.log.write("Test script run timed out", level=ERROR) + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"{TESTS_TIMEOUT} after {self.timeout} seconds\n") + return False + except Exception as e: + # Test command run failed + self.log.write("Test script run failed", level=ERROR) + with open(self.log_file, "a", encoding="utf-8") as f: + f.write(f"{TESTS_ERROR}: {e}") + return False + + def __exit__(self, exc_type, exc_value, exc_traceback): + os.chdir(self.cwd) + try: + os.chmod(self.log_file, 0o666) + except Exception as e: + self.log.write(f"Error changing file permissions: {e}", level=ERROR) diff --git a/python/swe/swe_bench_docker/docker_file_generator/docker_file_generator.py b/python/swe/swe_bench_docker/docker_file_generator/docker_file_generator.py new file mode 100644 index 0000000000..bccfcaf2b1 --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/docker_file_generator.py @@ -0,0 +1,448 @@ +import logging +import os +from typing import List + +from jinja2 import Environment, FileSystemLoader +from pydantic import BaseModel, Field +from swebench import MAP_VERSION_TO_INSTALL, get_eval_refs, get_instances + +from swe.swe_bench_docker.docker_file_generator.const import ( + MAP_REPO_TO_DEB_PACKAGES, + PYENV_REPOS, + PYTHON_ENVIRONMENT_VERSIONS, +) +from swe.swe_bench_docker.docker_file_generator.utils import ( + get_environment_yml, + get_requirements, +) + + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("build_docker") + + +class DockerfileGenerator: + def __init__( + self, + swe_bench_tasks: str, + namespace: str = "aorwall", + docker_dir: str = "docker", + predictions_path: str = None, + is_testbed: bool = False, + ): + self.namespace = namespace + self.docker_dir = docker_dir + self.task_instances = list(get_eval_refs(swe_bench_tasks).values()) + + self.image_prefix = "swe-bench" + + self.dockerfiles_to_build = [ + ( + "docker/Dockerfile", + f"{self.namespace}/{self.image_prefix}-conda:bookworm-slim", + ), + ( + "docker/pyenv/Dockerfile", + f"{self.namespace}/{self.image_prefix}-pyenv:bookworm-slim", + ), + ( + "docker/pyenv/Dockerfile-pyenvs", + f"{self.namespace}/{self.image_prefix}-pyenvs:bookworm-slim", + ), + ] + + env = Environment(loader=FileSystemLoader("../templates")) + # self.conda_instance_template = env.get_template(f"Dockerfile.conda_instance") + # self.pyenv_instance_template = env.get_template(f"Dockerfile.pyenv_instance") + self.conda_testbed_template = env.get_template("Dockerfile.swe") + self.pyenv_testbed_template = env.get_template("Dockerfile.swe") + self.conda_repository_template = env.get_template("Dockerfile.conda_repository") + self.pyenv_repository_template = env.get_template("Dockerfile.pyenv_repository") + self.instance_template = env.get_template("Dockerfile.pyenv_instance") + script_dir = os.path.join(os.path.dirname(__file__), '../templates') + self.getconda_path = os.path.join(script_dir, 'getconda.sh') + + if predictions_path: + predictions = get_instances(predictions_path) + self.instance_ids = set([p["instance_id"] for p in predictions]) + logger.info(f"Found {len(self.instance_ids)} in predictions file") + else: + self.instance_ids = None + + def generate(self): + testbeds = set() + task_instances_grouped = self.group_task_instances(self.task_instances) + + for repo, map_version_to_instances in task_instances_grouped.items(): + logger.info(f"Repo {repo}: {len(map_version_to_instances)} versions") + + # Determine instances to use for environment installation + for version, instances in map_version_to_instances.items(): + if self.instance_ids: + instances = [ + instance + for instance in instances + if instance["instance_id"] in self.instance_ids + ] + if not instances: + logger.info(f"No instances for {repo} {version}") + continue + + logger.info(f"\tVersion {version}: {len(instances)} instances") + + repo_name = _repo_name(repo) + + specifications = MAP_VERSION_TO_INSTALL[repo][version] + + use_conda = repo not in PYENV_REPOS + + if repo_name not in testbeds: + deb_packages = None + if repo in MAP_REPO_TO_DEB_PACKAGES: + deb_packages = MAP_REPO_TO_DEB_PACKAGES[repo] + + # if use_conda: + # self.generate_conda_repository_dockerfile(repo, deb_packages) + # else: + # self.generate_pyenv_repository_dockerfile(repo, deb_packages) + + testbeds.add(repo_name) + + self.generate_testbed_dockerfile( + repo=repo, + version=version, + setup_ref_instance=instances[0], + specifications=specifications, + use_conda=use_conda, + ) + for each_instance in instances: + if ( + "instance_image" in specifications + and specifications["instance_image"] + ): + for instance in instances: + install_cmd = specifications["install"] + self.generate_instance_dockerfile( + instance=instance, + install_cmd=install_cmd, + ) + + self.create_makefile() + self.generate_docker_compose() + + for dockerfile, image_name in self.dockerfiles_to_build: + print(f"docker build -t {image_name} -f {dockerfile} .") + + def create_makefile(self): + with open("Makefile", "w") as f: + f.write("all:\n") + for dockerfile, image_name in self.dockerfiles_to_build: + f.write(f"\tdocker build -t {image_name} -f {dockerfile} .\n") + + def group_task_instances(self, task_instances): + task_instances_grouped = {} + for instance in task_instances: + # Group task instances by repo, version + repo = instance["repo"] + version = instance["version"] if "version" in instance else None + if repo not in task_instances_grouped: + task_instances_grouped[repo] = {} + if version not in task_instances_grouped[repo]: + task_instances_grouped[repo][version] = [] + task_instances_grouped[repo][version].append(instance) + + return task_instances_grouped + + def generate_conda_repository_dockerfile(self, repo: str, deb_packages: List[str]): + repo_name = _repo_name(repo) + + base_image = f"{self.namespace}/{self.image_prefix}-conda:bookworm-slim" + + dockerfile_content = self.conda_repository_template.render( + base_image=base_image, + deb_packages=" ".join(deb_packages) if deb_packages else None, + repo_name=repo_name, + ) + + repo_dir = f"{self.docker_dir}/{repo_name}" + if not os.path.exists(repo_dir): + os.makedirs(repo_dir) + + output_file = f"{repo_dir}/Dockerfile" + with open(output_file, "w") as f: + f.write(dockerfile_content) + + print(f"Dockerfile generated: {output_file}") + + repo_image_name = repo.replace("/", "_") + + self.dockerfiles_to_build.append( + ( + output_file, + f"{self.namespace}/{self.image_prefix}-{repo_image_name}:bookworm-slim", + ) + ) + + def generate_pyenv_repository_dockerfile(self, repo: str, deb_packages: List[str]): + repo_name = _repo_name(repo) + + base_image = f"{self.namespace}/{self.image_prefix}-pyenv:bookworm-slim" + pyenv_image = f"{self.namespace}/swe-bench-pyenvs:bookworm-slim" + + dockerfile_content = self.pyenv_repository_template.render( + base_image=base_image, + pyenv_image=pyenv_image, + deb_packages=" ".join(deb_packages) if deb_packages else None, + repo_name=repo_name, + ) + + repo_dir = f"{self.docker_dir}/{repo_name}" + if not os.path.exists(repo_dir): + os.makedirs(repo_dir) + + output_file = f"{repo_dir}/Dockerfile" + with open(output_file, "w") as f: + f.write(dockerfile_content) + + print(f"Dockerfile generated: {output_file}") + + repo_image_name = repo.replace("/", "_") + + self.dockerfiles_to_build.append( + ( + output_file, + f"{self.namespace}/{self.image_prefix}-{repo_image_name}:bookworm-slim", + ) + ) + + def generate_docker_compose(self): + import yaml + + services = {} + for dockerfile, image_name in self.dockerfiles_to_build: + service_name = image_name.split("/")[ + -1 + ] # Use the image name as the service name + services[service_name] = { + "build": {"context": ".", "dockerfile": dockerfile}, + "image": image_name, + } + + docker_compose_dict = {"version": "3.8", "services": services} + + docker_compose_path = os.path.join(self.docker_dir, "docker-compose.yml") + with open(docker_compose_path, "w") as f: + yaml.dump(docker_compose_dict, f, default_flow_style=False) + + print(f"docker-compose.yml generated at: {docker_compose_path}") + + def generate_testbed_dockerfile( + self, + repo: str, + version: str, + specifications: dict, + setup_ref_instance: dict, + use_conda: bool = False, + ): + repo_name = _repo_name(repo) + repo_image_name = repo.replace("/", "_") + + env_name = f"{repo_name}__{version}" + + test_bed_dir = f"{self.docker_dir}/{repo_name}/{version}" + requirements_path = f"./swe_bench" + + environment_setup_commit = setup_ref_instance.get( + "environment_setup_commit", setup_ref_instance["base_commit"] + ) + + path_to_reqs = None + path_to_env_file = None + install_cmds = [] + + testbed_dir = f"{self.docker_dir}/{repo_name}/{version}" + if not os.path.exists(testbed_dir): + os.makedirs(testbed_dir) + + pre_install_cmds = specifications.get("pre_install", None) + + pip_packages = specifications.get("pip_packages", []) + + # Create conda environment according to install instructinos + pkgs = specifications["packages"] if "packages" in specifications else "" + if pkgs == "requirements.txt": + # Create environment + conda_create_cmd = ( + f"conda create -n {env_name} python={specifications['python']} -y" + ) + + path_to_reqs = get_requirements(setup_ref_instance, save_path=test_bed_dir) + + if specifications["python"] == "3.5": + install_cmds.append( + "pip install --trusted-host pypi.python.org --trusted-host files.pythonhosted.org --trusted-host pypi.org -r requirements.txt" + ) + else: + install_cmds.append("pip install -r requirements.txt") + elif pkgs == "environment.yml": + # if not use_conda: + # raise ValueError(f"Can't create non conda docker image with environment.yml set") + + if "no_use_env" in specifications and specifications["no_use_env"]: + # Create environment from yml + path_to_env_file = get_environment_yml( + setup_ref_instance, env_name, save_path=test_bed_dir + ) + conda_create_cmd = f"conda create -c conda-forge -n {env_name} python={specifications['python']} -y" + + # Install dependencies + install_cmds.append("conda env update -f environment.yml") + else: + # Create environment from yml + path_to_env_file = get_environment_yml( + setup_ref_instance, + env_name, + save_path=test_bed_dir, + python_version=specifications["python"], + ) + + conda_create_cmd = "conda env create -f environment.yml" + elif use_conda: + conda_create_cmd = f"conda create -n {env_name} python={specifications['python']} {pkgs} -y" + else: + conda_create_cmd = None + pip_packages.extend(pkgs.split()) + + # Install additional packages if specified + if pip_packages: + pip_packages = " ".join(pip_packages) + install_cmds.append(f"pip install {pip_packages}") + + if "install" in specifications and ( + "instance_image" not in specifications + or not specifications["instance_image"] + ): + install_cmds.append(specifications["install"]) + + repo_name = _repo_name(repo) + + base_image = ( + f"{self.namespace}/{self.image_prefix}-{repo_image_name}:bookworm-slim" + ) + pyenv_image = f"{self.namespace}/swe-bench-pyenvs:bookworm-slim" + + python_version = specifications["python"] + if use_conda: + template = self.conda_testbed_template + else: + python_version = PYTHON_ENVIRONMENT_VERSIONS[python_version] + template = self.pyenv_testbed_template + + dockerfile_content = template.render( + base_image=base_image, + pyenv_image=pyenv_image, + docker_dir=self.docker_dir, + repo_name=repo_name, + version=version, + testbed=repo_name + "__" + version, + python_version=python_version, + conda_create_cmd=conda_create_cmd, + pre_install_cmds=pre_install_cmds, + install_cmds=install_cmds, + path_to_reqs=path_to_reqs, + environment_setup_commit=environment_setup_commit, + path_to_env_file=path_to_env_file, + getconda_script_path=self.getconda_path, + ) + + testbed_dir = f"{self.docker_dir}/{repo_name}/{version}" + if not os.path.exists(testbed_dir): + os.makedirs(testbed_dir) + + output_file = f"{testbed_dir}/Dockerfile" + with open(output_file, "w") as f: + f.write(dockerfile_content) + + print(f"Dockerfile generated: {output_file}") + + self.dockerfiles_to_build.append( + ( + output_file, + f"{self.namespace}/{self.image_prefix}-{repo_image_name}-testbed:{version}", + ) + ) + + def generate_instance_dockerfile( + self, + instance: dict, + install_cmd: str, + ): + """ + Build one Docker image per benchmark instance to not have to build the environment each time before testing in + repositories using Cython. + """ + repo = instance["repo"] + version = instance["version"] + repo_name = _repo_name(repo) + repo_image_name = repo.replace("/", "_") + + base_image = f"{self.namespace}/{self.image_prefix}-{repo_image_name}-testbed:{instance['version']}" + + dockerfile_content = self.instance_template.render( + base_image=base_image, + repo_name=repo_name, + install_cmd=install_cmd, + base_commit=instance["base_commit"], + ) + + instance_dir = ( + f"{self.docker_dir}/{repo_name}/{version}/{instance['instance_id']}" + ) + if not os.path.exists(instance_dir): + os.makedirs(instance_dir) + + output_file = f"{instance_dir}/Dockerfile" + with open(output_file, "w") as f: + f.write(dockerfile_content) + + print(f"Dockerfile generated: {output_file}") + + self.dockerfiles_to_build.append( + ( + output_file, + f"{self.namespace}/{self.image_prefix}-{repo_image_name}-instance:{instance['instance_id']}", + ) + ) + + +def _repo_name(repo: str) -> str: + return repo.replace("/", "__") + + +class DockerGeneratorArgs(BaseModel): + swe_bench_tasks_path: str = Field( + ..., description="Path to candidate task instances file" + ) + namespace: str = Field(..., description="Docker repository namespace") + prediction_path: str = Field(..., description="Path to predictions file") + docker_dir: str = Field(..., description="Path to docker directory") + is_testbed: bool = Field( + default=False, description="if dockerfile needs to be genrated for testbed" + ) + + +if __name__ == "__main__": + args = DockerGeneratorArgs( + swe_bench_tasks_path="princeton-nlp/SWE-bench_Lite", + namespace="techcomposio", + prediction_path="", + docker_dir="/home/shubhra/work/composio/composio_sdk/python/swe/swe_bench_docker/docker/", + ) + generator = DockerfileGenerator( + args.swe_bench_tasks_path, + args.namespace, + args.docker_dir, + args.prediction_path, + args.is_testbed, + ) + generator.generate() diff --git a/python/swe/swe_bench_docker/docker_file_generator/evaluate_instance.py b/python/swe/swe_bench_docker/docker_file_generator/evaluate_instance.py new file mode 100644 index 0000000000..4788d59ebc --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/evaluate_instance.py @@ -0,0 +1,129 @@ +import base64 +import json +import logging +import os +import sys + +from swe.swe_bench_docker.docker_file_generator.const import KEY_PREDICTION, PatchType +from swe.swe_bench_docker.docker_file_generator.context_manager import ( + TaskEnvContextManager, +) +from swe.swe_bench_docker.docker_file_generator.utils import extract_minimal_patch + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logging.getLogger().setLevel(logging.INFO) +logger = logging.getLogger("evaluate_instance") + + +def main( + task_instance: dict, + testbed_name: str, + repo_dir: str, + log_dir: str, + timeout: int, + log_suffix: str = None, + image_type: str = "conda", +): + logger.info( + "Instance ID: " + + task_instance["instance_id"] + + "\nTestbed: " + + testbed_name + + "\nLog dir: " + + log_dir + ) + + with TaskEnvContextManager( + task_instance, + testbed_name, + repo_dir, + log_dir, + timeout=timeout, + log_suffix=log_suffix, + image_type=image_type, + ) as tcm: + # Attempt to apply prediction + patch_type = PatchType.PATCH_PRED_TRY.value + + # If prediction patch doesn't apply, try to do some minor patch refactoring and try again + if ( + not tcm.apply_patch(task_instance[KEY_PREDICTION], patch_type=patch_type) + and task_instance[KEY_PREDICTION] is not None + and task_instance[KEY_PREDICTION] != "" + ): + task_instance[KEY_PREDICTION] = extract_minimal_patch( + task_instance[KEY_PREDICTION] + ) + patch_type = PatchType.PATCH_PRED_MINIMAL_TRY.value + if not tcm.apply_patch( + task_instance[KEY_PREDICTION], patch_type=patch_type + ): + logger.warning("Failed to apply prediction patch") + sys.exit(1) + + tcm.apply_patch( + task_instance[KEY_PREDICTION], patch_type=patch_type, revert=True + ) + + # Set prediction patch label based on whether patch was edited + if patch_type == PatchType.PATCH_PRED_MINIMAL_TRY.value: + patch_type = PatchType.PATCH_PRED_MINIMAL.value + else: + patch_type = PatchType.PATCH_PRED.value + + # Run testing script + prediction_patch = task_instance[KEY_PREDICTION] + test_patch = task_instance["test_patch"] + if ( + ( + prediction_patch + and not tcm.apply_patch(prediction_patch, patch_type=patch_type) + ) + or ( + test_patch + and not tcm.apply_patch( + test_patch, patch_type=PatchType.PATCH_TEST.value + ) + ) + or not tcm.run_tests_task(task_instance) + ): + logger.warning("Evaluation failed") + sys.exit(1) + + logger.info("Evaluation succeeded") + + +if __name__ == "__main__": + TASK_INSTANCE_JSON = "/home/swe-bench/task_instance.json" + if os.path.exists(TASK_INSTANCE_JSON): + with open(TASK_INSTANCE_JSON, "r") as f: + task_instance = json.load(f) + else: + assert ( + os.getenv("INSTANCE") is not None + ), "INSTANCE environment variable is not set" + task_instance = json.loads( + base64.b64decode(os.getenv("INSTANCE")).decode("utf-8") + ) + assert os.getenv("LOG_DIR") is not None, "LOG_DIR environment variable is not set" + assert ( + os.getenv("TESTBED_NAME") is not None + ), "TESTBED_NAME environment variable is not set" + + repo_dir = os.getenv("REPO_DIR") + if not repo_dir: + repo_dir = os.getenv("TESTBED") + + assert repo_dir, "REPO_DIR environment variable is not set" + main( + task_instance=task_instance, + testbed_name=os.getenv("TESTBED_NAME"), + repo_dir=repo_dir, + log_dir=os.getenv("LOG_DIR"), + timeout=int(os.getenv("TIMEOUT")) if os.getenv("TIMEOUT") is not None else None, + log_suffix=os.getenv("LOG_SUFFIX"), + image_type=os.getenv("IMAGE_TYPE", "conda"), + ) diff --git a/python/swe/swe_bench_docker/docker_file_generator/run_docker.py b/python/swe/swe_bench_docker/docker_file_generator/run_docker.py new file mode 100644 index 0000000000..73c05dd813 --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/run_docker.py @@ -0,0 +1,161 @@ +# pylint: disable=W1203 + +import asyncio +import base64 +import json +import logging +import os +import subprocess +import tempfile +import time + +from swe.swe_bench_docker.docker_file_generator.const import MAP_VERSION_TO_INSTALL + + +logger = logging.getLogger(__name__) + + +async def run_docker_container( + task_instance: dict, + namespace: str, + log_dir: str, + timeout: int = 900, + log_suffix: str = "", + verbose: bool = False, +): + repo_name = task_instance["repo"].replace("/", "_") + + specifications = MAP_VERSION_TO_INSTALL[task_instance["repo"]][ + task_instance["version"] + ] + image_prefix = "swe-bench" + + # TODO: Change this when deciding + if "packages" in specifications and specifications["packages"] == "environment.yml": + container_log_dir = "/home/swe-bench/logs" + else: + container_log_dir = "/opt/logs" + + if specifications.get("instance_image", False): + docker_image = f"{namespace}/{image_prefix}-{repo_name}-instance:{task_instance['instance_id']}" + else: + docker_image = ( + f"{namespace}/{image_prefix}-{repo_name}-testbed:{task_instance['version']}" + ) + + swebench_docker_fork_dir = os.environ.get("SWEBENCH_DOCKER_FORK_DIR") + + if swebench_docker_fork_dir: + # Create a temporary file to store the task_instance JSON + tmpfile_path = tempfile.mktemp(suffix=".json") + with open(tmpfile_path, "w+", encoding="utf-8") as f: + json.dump(task_instance, f) + + docker_command = [ + "docker", + "run", + "--rm", + "-u", + "root", + "-v", + f"{log_dir}:{container_log_dir}", + # Map the swebench_docker fork dir to the container + # for some reason, swebench_docker has different locations for the different containers :( + # so we need to map all of them to make it work + "-v", + f"{swebench_docker_fork_dir}/swebench_docker:/opt/swebench_docker:ro", + "-v", + f"{swebench_docker_fork_dir}/swebench_docker:/home/swe-bench/swebench_docker:ro", + "-v", + f"{swebench_docker_fork_dir}/swebench_docker:/home/swe-bench/swebench:ro", + # ======= + # Map file instead pass the instance as env var to avoid "Argument list too long" error + "-v", + f"{tmpfile_path}:/home/swe-bench/task_instance.json:ro", + "-e", + f"LOG_DIR={container_log_dir}", + "-e", + f"TIMEOUT={timeout}", + "-e", + f"LOG_SUFFIX={log_suffix}", + docker_image, + ] + else: + # Base64 encode the instance JSON to be sure it can be passed as an environment variable + instance_b64 = base64.b64encode( + json.dumps(task_instance).encode("utf-8") + ).decode("utf-8") + docker_command = [ + "docker", + "run", + "--rm", + "-v", + f"{log_dir}:{container_log_dir}", + "-e", + f"INSTANCE={instance_b64}", + "-e", + f"LOG_DIR={container_log_dir}", + "-e", + f"TIMEOUT={timeout}", + "-e", + f"LOG_SUFFIX={log_suffix}", + docker_image, + ] + + cmd_string = " ".join(docker_command) + + if verbose: + logger.info(cmd_string) + + process = await asyncio.create_subprocess_shell( + cmd_string, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + stdout, stderr = await process.communicate() + stdout = stdout.decode() + if stderr: + stderr = stderr.decode() + + if swebench_docker_fork_dir: + os.unlink(tmpfile_path) + + return process.returncode, stdout, stderr + + +async def run_docker_evaluation( + task_instance: dict, + namespace: str, + log_dir: str, + timeout: int = 900, + log_suffix: str = "", + verbose: bool = False, +): + start_time = time.time() + docker_image = f"{namespace}/swe-bench-{task_instance['repo'].replace('/', '_')}-testbed:{task_instance['version']}" + + try: + returncode, stdout, stderr = await run_docker_container( + task_instance, namespace, log_dir, timeout, log_suffix, verbose + ) + elapsed_time = time.time() - start_time + + if returncode != 0: + logger.warning( + f"[{task_instance['instance_id']}][{docker_image}] Error running container:" + ) + logger.warning(f"Stdout - {stdout}") + logger.warning(f"Stderr - {stderr}") + elif "Evaluation succeeded" not in stdout: + logger.warning( + f"[{task_instance['instance_id']}][{docker_image}] \ + Container ran successfully in {elapsed_time} seconds, but evaluation failed." + ) + logger.warning(f"stdout - {stdout}") + else: + logger.info( + f"[{task_instance['instance_id']}][{docker_image}] \ + Container ran successfully in {elapsed_time} seconds." + ) + except Exception as e: + logger.warning( + f"[{task_instance['instance_id']}][{docker_image}] Error running container: {e}" + ) diff --git a/python/swe/swe_bench_docker/docker_file_generator/utils.py b/python/swe/swe_bench_docker/docker_file_generator/utils.py new file mode 100644 index 0000000000..12de9adbb4 --- /dev/null +++ b/python/swe/swe_bench_docker/docker_file_generator/utils.py @@ -0,0 +1,402 @@ +# flake8: noqa E203 + +import json +import os +import re +from datetime import datetime + +from swe.swe_bench_docker.docker_file_generator.const import ( + MAP_REPO_TO_ENV_YML_PATHS, + MAP_REPO_TO_REQS_PATHS, + NON_TEST_EXTS, + SWE_BENCH_URL_RAW, +) + + +def get_environment_yml( + instance: dict, + env_name: str, + save_path: str = None, + python_version: str = None, +) -> str: + """ + Get environment.yml for given task instance + + Args: + instance (dict): SWE Bench Task instance + env_name (str): Rename retrieved environment.yml to this name + save_path (str): If provided, save environment.yml to this path + Returns: + environment.yml (str): If save_path given, returns path to saved environment.yml. + Otherwise, returns environment.yml as string + """ + import requests + + # Attempt to find environment.yml at each path based on task instance's repo + path_worked = False + + commit = ( + "environment_setup_commit" + if "environment_setup_commit" in instance + else "base_commit" + ) + for req_path in MAP_REPO_TO_ENV_YML_PATHS[instance["repo"]]: + reqs_url = os.path.join( + SWE_BENCH_URL_RAW, instance["repo"], instance[commit], req_path + ) + reqs = requests.get(reqs_url) + if reqs.status_code == 200: + path_worked = True + break + if not path_worked: + print( + f"Could not find environment.yml at paths {MAP_REPO_TO_ENV_YML_PATHS[instance['repo']]}" + ) + return None + + lines = reqs.text.split("\n") + cleaned = [] + for line in lines: + # Rename environment to given name + if line.startswith("name:"): + cleaned.append(f"name: {env_name}") + continue + if line.startswith("dependencies:"): + cleaned.append(line) + if python_version is not None: + cleaned.append(f" - python={python_version}") + continue + cleaned.append(line) + + # Return environment.yml as string if no save path given + if save_path is None: + return "\n".join(cleaned) + + # Save environment.yml to given path and return path + path_to_reqs = os.path.join(save_path, "environment.yml") + with open(path_to_reqs, "w") as f: + f.write("\n".join(cleaned)) + return path_to_reqs + + +def get_instances(instance_path: str) -> list: + """ + Get task instances from given path + + Args: + instance_path (str): Path to task instances + Returns: + task_instances (list): List of task instances + """ + if any([instance_path.endswith(x) for x in [".jsonl", ".jsonl.all"]]): + task_instances = list() + with open(instance_path) as f: + for line in f.readlines(): + task_instances.append(json.loads(line)) + return task_instances + + with open(instance_path) as f: + task_instances = json.load(f) + return task_instances + + +def get_requirements(instance: dict, save_path: str = None): + """ + Get requirements.txt for given task instance + + Args: + instance (dict): task instance + save_path (str): If provided, save requirements.txt to this path + Returns: + requirements.txt (str): If save_path given, returns path to saved requirements.txt. + Otherwise, returns requirements.txt as string + """ + import requests + + # Attempt to find requirements.txt at each path based on task instance's repo + path_worked = False + commit = ( + "environment_setup_commit" + if "environment_setup_commit" in instance + else "base_commit" + ) + + for req_path in MAP_REPO_TO_REQS_PATHS[instance["repo"]]: + reqs_url = os.path.join( + SWE_BENCH_URL_RAW, instance["repo"], instance[commit], req_path + ) + + reqs = requests.get(reqs_url) + if reqs.status_code == 200: + path_worked = True + break + if not path_worked: + print( + f"Could not find requirements.txt at paths {MAP_REPO_TO_REQS_PATHS[instance['repo']]}" + ) + return None + + lines = reqs.text + original_req = [] + additional_reqs = [] + req_dir = "/".join(req_path.split("/")[:-1]) + exclude_line = lambda line: any( + [line.strip().startswith(x) for x in ["-e .", "#", ".[test"]] + ) + + for line in lines.split("\n"): + line = line.replace("sys.platform", "sys_platform") # Django specific? + + if line.strip().startswith("-r"): + # Handle recursive requirements + file_name = line[len("-r") :].strip() + reqs_url = os.path.join( + SWE_BENCH_URL_RAW, + instance["repo"], + instance[commit], + req_dir, + file_name, + ) + reqs = requests.get(reqs_url) + if reqs.status_code == 200: + for line_extra in reqs.text.split("\n"): + if not exclude_line(line_extra): + additional_reqs.append(line_extra) + else: + if not exclude_line(line): + original_req.append(line) + + # Combine all requirements into single text body + additional_reqs.append("\n".join(original_req)) + all_reqs = "\n".join(additional_reqs) + + if save_path is None: + return all_reqs + + path_to_reqs = os.path.join(save_path, "requirements.txt") + with open(path_to_reqs, "w") as f: + f.write(all_reqs) + return path_to_reqs + + +def get_test_directives(instance: dict) -> list: + """ + Get test directives from the test_patch of a task instance + + Args: + instance (dict): task instance + Returns: + directives (list): List of test directives + """ + # For seq2seq code repos, testing command is fixed + if instance["repo"] == "swe-bench/humaneval": + return ["test.py"] + + # Get test directives from test patch and remove non-test files + diff_pat = r"diff --git a/.* b/(.*)" + test_patch = instance["test_patch"] + directives = re.findall(diff_pat, test_patch) + directives = [ + d for d in directives if not any(d.endswith(ext) for ext in NON_TEST_EXTS) + ] + + # For Django tests, remove extension + "tests/" prefix and convert slashes to dots (module referencing) + if instance["repo"] == "django/django": + directives_transformed = [] + for d in directives: + d = d[: -len(".py")] if d.endswith(".py") else d + d = d[len("tests/") :] if d.startswith("tests/") else d + d = d.replace("/", ".") + directives_transformed.append(d) + directives = directives_transformed + + return directives + + +def split_instances(input_list: list, n: int) -> list: + """ + Split a list into n approximately equal length sublists + + Args: + input_list (list): List to split + n (int): Number of sublists to split into + Returns: + result (list): List of sublists + """ + avg_length = len(input_list) // n + remainder = len(input_list) % n + result, start = [], 0 + + for i in range(n): + length = avg_length + 1 if i < remainder else avg_length + sublist = input_list[start : start + length] + result.append(sublist) + start += length + + return result + + +def find_python_by_date(target_date, date_format="%Y%m%d"): + """ + Find python version closest to given date + + Args: + target_date (str): Date to find python version for + date_format (str): Format of target_date + Returns: + python_version (str): Python version closest to target_date + """ + import requests + + # Make web request to versions + date page + url = "https://www.python.org/doc/versions/" + response = requests.get(url) + + # Look for all matches + pattern = r"Python (.*), documentation released on (.*)\.= x[1]: + return x[0] + return None + + +class DotDict: + """ + Wrapper class for accessing dictionary keys as attributes + """ + + def __init__(self, data): + self.data = data + + def __getattr__(self, key): + return self.data.get(key) + + +# MARK - Patch Correction +PATCH_PATTERN = re.compile( + r"(?:diff[\w\_\.\ \/\-]+\n)?\-\-\-\s+a\/(?:.*?)\n\+\+\+\s+b\/(?:.*?)(?=diff\ |\-\-\-\ a\/|\Z)", + re.DOTALL, +) +PATCH_FILE_PATTERN = re.compile(r"\-\-\-\s+a\/(?:.+)\n\+\+\+\s+b\/(?:.+)") +PATCH_HUNK_PATTERN = re.compile( + r"\@\@\s+\-(\d+),(\d+)\s+\+(\d+),(\d+)\s+\@\@(.+?)(?=diff\ |\-\-\-\ a\/|\@\@\ \-|\Z)", + re.DOTALL, +) + + +def get_first_idx(charlist): + """Get index of first occurrence of "-" or "+" in charlist""" + first_min = charlist.index("-") if "-" in charlist else len(charlist) + first_plus = charlist.index("+") if "+" in charlist else len(charlist) + return min(first_min, first_plus) + + +def get_last_idx(charlist): + """Get index of last occurrence of "-" or "+" in charlist""" + char_idx = get_first_idx(charlist[::-1]) + last_idx = len(charlist) - char_idx + return last_idx + 1 + + +def strip_content(hunk): + """Remove trailing non +/- lines and trailing whitespace per line per hunk""" + first_chars = list(map(lambda x: None if not len(x) else x[0], hunk.split("\n"))) + first_idx = get_first_idx(first_chars) + last_idx = get_last_idx(first_chars) + new_lines = list(map(lambda x: x.rstrip(), hunk.split("\n")[first_idx:last_idx])) + new_hunk = "\n" + "\n".join(new_lines) + "\n" + return new_hunk, first_idx - 1 + + +def get_hunk_stats(pre_start, pre_len, post_start, post_len, hunk, total_delta): + """Recalculate hunk start/end position and diff delta""" + stats = {"context": 0, "added": 0, "subtracted": 0} + hunk = hunk.split("\n", 1)[-1].strip("\n") + for line in hunk.split("\n"): + if line.startswith("-"): + stats["subtracted"] += 1 + elif line.startswith("+"): + stats["added"] += 1 + else: + stats["context"] += 1 + context = stats["context"] + added = stats["added"] + subtracted = stats["subtracted"] + pre_len = context + subtracted + post_start = pre_start + total_delta + post_len = context + added + total_delta = total_delta + (post_len - pre_len) + return pre_start, pre_len, post_start, post_len, total_delta + + +def extract_minimal_patch(model_patch): + """ + Wrapper function that takes hunk and + * Removes trailing non +/- lines and trailing whitespace per line per hunk + * Recalculates hunk start/end position and diff delta + * Returns new patch + """ + model_patch = model_patch.lstrip("\n") + new_patch = "" + for patch in PATCH_PATTERN.findall(model_patch): + total_delta = 0 + patch_header = PATCH_FILE_PATTERN.findall(patch)[0] + if patch_header: + new_patch += patch_header + "\n" + for hunk in PATCH_HUNK_PATTERN.findall(patch): + pre_start, pre_len, post_start, post_len, content = hunk + pre_start, pre_len, post_start, post_len, content = list( + map(lambda x: int(x) if x.isnumeric() else x, hunk) + ) + content, adjust_pre_start = strip_content(content) + pre_start += adjust_pre_start + pre_start, pre_len, post_start, post_len, total_delta = get_hunk_stats( + pre_start, pre_len, post_start, post_len, content, total_delta + ) + new_patch += ( + f"@@ -{pre_start},{pre_len} +{post_start},{post_len} @@{content}" + ) + return new_patch + + +def has_attribute_or_import_error(log_before): + """ + Check to see if Attribute/Import-prefix is in log text + + Args: + log_before (str): Validation log text before patch application + """ + log_before = log_before.lower() + + if any([x in log_before for x in ["attribute", "import"]]): + + def get_lines_with_word(text, target_word): + # Function to extract line(s) that contains target_word + text, target_word = text.lower(), target_word.lower() + lines, hits = text.split("\n")[::-1], [] + for line in lines: + if target_word in line: + hits.append(line) + return hits + + # Get line with Attribute/Import error + lines_1 = get_lines_with_word(log_before, "attribute") + lines_2 = get_lines_with_word(log_before, "import") + lines_1 = " ".join(lines_1) + lines_2 = " ".join(lines_2) + + if any([(x in lines_1 or x in lines_2) for x in ["error", "fail"]]): + return True + return False diff --git a/python/swe/swe_bench_docker/evaulate_on_docker.py b/python/swe/swe_bench_docker/evaulate_on_docker.py new file mode 100644 index 0000000000..afdaf2e7d3 --- /dev/null +++ b/python/swe/swe_bench_docker/evaulate_on_docker.py @@ -0,0 +1,211 @@ +# pylint: disable=W1203, W1201, R1729 + +import asyncio +import hashlib +import logging +import os + +from pydantic import BaseModel, Field +from swebench import get_eval_refs + +from swe.swe_bench_docker.docker_file_generator.const import ( + KEY_INSTANCE_ID, + KEY_MODEL, + KEY_PREDICTION, + MAP_REPO_TO_TEST_FRAMEWORK, +) +from swe.swe_bench_docker.docker_file_generator.run_docker import run_docker_evaluation +from swe.swe_bench_docker.docker_file_generator.utils import ( + get_instances, + get_test_directives, +) + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger("run_evaluation") + + +def deterministic_hash(input_string: str, length: int = None): + input_bytes = input_string.encode("utf-8") + sha256_hash = hashlib.sha256(input_bytes) + hex_digest = sha256_hash.hexdigest() + if length is None: + return hex_digest + return hex_digest[:length] + + +def validate_predictions(predictions_path, tasks_ids): + # Check that predictions file exists + if not any([predictions_path.endswith(x) for x in [".json", ".jsonl"]]): + raise ValueError("Predictions path must be .json or .jsonl file") + predictions = get_instances(predictions_path) + not_in_tasks = [] + # Check that predictions are correctly formatted + for pred in predictions: + if any([x not in pred for x in [KEY_INSTANCE_ID, KEY_MODEL, KEY_PREDICTION]]): + raise ValueError( + f"Every prediction must have {KEY_INSTANCE_ID}, {KEY_MODEL}, and {KEY_PREDICTION} fields" + ) + if pred[KEY_INSTANCE_ID] not in tasks_ids: + not_in_tasks.append(pred[KEY_INSTANCE_ID]) + # Check that instance IDs specified by predictions exist + if len(not_in_tasks) > 0: + logger.warning( + "Predictions for the following instance_ids were not " + + "found in the tasks file and will not be considered: " + + ", ".join(not_in_tasks) + ) + + +async def run_docker_throttled( + task_instance, namespace, log_dir, timeout, log_suffix, sem +): + async with sem: + return await run_docker_evaluation( + task_instance, namespace, log_dir, timeout, log_suffix + ) + + +async def evaluate( + predictions_path: str, + swe_bench_tasks: str, + namespace: str, + log_dir: str, + log_suffix: str = "", + skip_existing: bool = False, + timeout: int = 900, + num_processes: int = -1, +): + """ + Runs evaluation on predictions for each model/repo/version combination. + + Args: + predictions_path (str): Path to the predictions file. + swe_bench_tasks (str): Path to the SWE-bench tasks file OR HF dataset name. + namespace (str): Docker repository namespace. + log_dir (str): Path to the directory where logs will be saved. + log_suffix (str): Suffix to append to log file names. + skip_existing (bool): Whether to skip evaluations for predictions that already have logs. + timeout (int): Timeout for each evaluation. + num_processes (int): Number of processes to run in parallel (-1 = unlimited) + + Raises: + ValueError: If log_dir is not a directory, testbed is not a directory, or swe_bench_tasks does not exist. + """ + # Validate arguments + if not os.path.exists(log_dir) or not os.path.isdir(log_dir): + raise ValueError("--log_dir must exist and point at a directory") + + tasks = list(get_eval_refs(swe_bench_tasks).values()) + + # Verify arguments are formatted correctly + if not isinstance(tasks, list): + raise ValueError(f"{swe_bench_tasks} must contain an array of tasks") + tasks_map = {t[KEY_INSTANCE_ID]: t for t in tasks} + predictions_path = os.path.abspath(predictions_path) + validate_predictions(predictions_path, [t[KEY_INSTANCE_ID] for t in tasks]) + + predictions = get_instances(predictions_path) + + if len(predictions) == 0: + logger.info("No predictions to evaluate") + return + + # Remove predictions that have already been evaluated + if skip_existing: + # Skip logs that already exist + predictions_filtered = [] + for p in predictions: + log_file_name = f"{p[KEY_INSTANCE_ID]}.{p[KEY_MODEL]}.eval.log" + if log_suffix: + log_file_name = ( + f"{p[KEY_INSTANCE_ID]}.{p[KEY_MODEL]}.{log_suffix}.eval.log" + ) + log_file = os.path.join(log_dir, log_file_name) + if not os.path.exists(log_file): + predictions_filtered.append(p) + if len(predictions_filtered) == 0: + logger.info("All predictions already exist, skipping") + return + logger.info( + f"# of predictions to evaluate: {len(predictions_filtered)} " + + f"({len(predictions) - len(predictions_filtered)} already evaluated)" + ) + predictions = predictions_filtered + else: + logger.info(f"# of predictions to evaluate: {len(predictions)}") + + task_instances = [] + + # Set the relevant data on task_instances + for prediction in predictions: + task = tasks_map[prediction[KEY_INSTANCE_ID]] + + test_type = MAP_REPO_TO_TEST_FRAMEWORK[task["repo"]] + test_directives = get_test_directives(task) + test_cmd = f"{test_type} {' '.join(test_directives)}" + + task_instances.append( + { + "repo": task["repo"], + "version": task["version"], + "base_commit": task["base_commit"], + KEY_INSTANCE_ID: prediction[KEY_INSTANCE_ID], + KEY_MODEL: prediction[KEY_MODEL], + KEY_PREDICTION: prediction[KEY_PREDICTION], + "test_patch": task["test_patch"], + "test_directives": test_directives, + "test_cmd": test_cmd, + } + ) + + task_instances = sorted(task_instances, key=lambda x: x[KEY_INSTANCE_ID]) + + sem = asyncio.Semaphore(num_processes if num_processes > 0 else len(task_instances)) + tasks = [] + for task_instance in task_instances: + if task_instance[KEY_PREDICTION]: + tasks.append( + run_docker_throttled( + task_instance, namespace, log_dir, timeout, log_suffix, sem + ) + ) + else: + logger.info(f"[{task_instance[KEY_INSTANCE_ID]}] No prediction found.") + + await asyncio.gather(*tasks) + + +class EvaluateOnDockerArgs(BaseModel): + predictions_path: str = Field(..., description="Path to predictions file") + log_dir: str = Field(..., description="Path to log directory") + swe_bench_tasks: str = Field( + ..., description="Path to dataset file or HF datasets name" + ) + namespace: str = Field(default="aorwall", description="Docker repository namespace") + log_suffix: str = Field( + default="", description="(Optional) Suffix to append to log file names" + ) + skip_existing: bool = Field( + default=False, description="(Optional) Skip existing logs" + ) + timeout: int = Field( + default=1800, description="(Optional) Timeout in seconds (default: 900)" + ) + num_processes: int = Field( + default=-1, + description="(Optional) Number of processes to run in parallel (-1 for unlimited)", + ) + + +if __name__ == "__main__": + args = EvaluateOnDockerArgs( + predictions_path="~/.composio_coder/logs/patches.json", + docker_dir="./docker", + swe_bench_tasks="princeton-nlp/SWE-bench_Lite", + namespace="aorwall", + log_dir="~/.composio_coder/logs/logs/", + ) + asyncio.run(evaluate(**args.dict())) diff --git a/python/swe/swe_bench_docker/scripts/build_docker_images.sh b/python/swe/swe_bench_docker/scripts/build_docker_images.sh new file mode 100755 index 0000000000..16bced677b --- /dev/null +++ b/python/swe/swe_bench_docker/scripts/build_docker_images.sh @@ -0,0 +1,96 @@ +#!/bin/bash +set -e + +# Check for minimum arguments +if [ "$#" -lt 2 ]; then + echo "Usage: $0 [repo_directory]" + exit 1 +fi + +docker_namespace=$2 +root_directory=$1 +repo=${3:-""} + +base_image="${docker_namespace}/swe-bench" + +echo "Building base Docker images..." + +build_docker_images() { + if [ -z "$repo" ]; then + # Build testbed base images in the root level directories first + for dir in $root_directory/*/; do + dockerfile_path="$dir/Dockerfile" + if [ -f "$dockerfile_path" ]; then + tag="${dir#$root_directory/}" + tag="${tag%/}" + image_name="$base_image-$(echo $tag | sed 's/__*/_/g')" + echo "Building Docker image: $image_name" + docker build -t "$image_name:bookworm-slim" -f "$dockerfile_path" . + fi + done + else + # Build specific repo directory + dir="$root_directory/$repo/" + if [ -d "$dir" ] && [ -f "$dir/Dockerfile" ]; then + tag="${repo%/}" + image_name="$base_image-$(echo $tag | sed 's/__*/_/g')" + echo "Building Docker image: $image_name" + docker build -t "$image_name:bookworm-slim" -f "$dir/Dockerfile" . + fi + fi + + if [ ! -z "$repo" ]; then + # Specific repo's versioned directories + for dir in $root_directory/$repo/*; do + build_versioned_images "$dir" + done + else + # All repos' versioned directories + for base_dir in $root_directory/*/; do + for dir in $base_dir/*; do + build_versioned_images "$dir" + done + done + fi +} + +build_versioned_images() { + dir=$1 + if [ -d "$dir" ] && [[ "$dir" =~ .*/[0-9]+\.[0-9]+$ ]]; then + dockerfile_path="$dir/Dockerfile" + if [ -f "$dockerfile_path" ]; then + base_dir=$(dirname "$dir") + version=$(basename "$dir") + tag_base="${base_dir#$root_directory/}" + tag_base="${tag_base%/*}" + tag_base="$(echo $tag_base | sed 's/__*/_/g')" + image_name="$base_image-${tag_base}-swe" + echo "Building Docker image: $image_name:$version for $dir/Dockerfile" + docker build -t "$image_name:$version" -f "$dockerfile_path" . + + for instance_dir in $dir/*/; do + build_instance_image "$instance_dir" "$tag_base" + done + fi + + fi +} + +build_instance_image() { + dir=$1 + tag_base=$2 + + dockerfile_path="$dir/Dockerfile" + if [ -f "$dockerfile_path" ]; then + base_dir=$(dirname "$dir") + instance_id=$(basename "$dir") + tag_base="${base_dir#$root_directory/}" + tag_base="${tag_base%/*}" + tag_base="$(echo $tag_base | sed 's/__*/_/g')" + image_name="$base_image-${tag_base}-instance" + echo "Building Docker image: $image_name:$instance_id for $dir/Dockerfile" + docker build -t "$image_name:$instance_id" -f "$dockerfile_path" . + fi +} + +build_docker_images diff --git a/python/swe/swe_bench_docker/scripts/docker_push.sh b/python/swe/swe_bench_docker/scripts/docker_push.sh new file mode 100755 index 0000000000..8456f8ca8d --- /dev/null +++ b/python/swe/swe_bench_docker/scripts/docker_push.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Usage check +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " + exit 1 +fi + +root_directory=$1 +docker_namespace=$2 +base_image="${docker_namespace}/swe-bench" + +push_docker_images() { + for dir in $root_directory/*/*; do + if [ -d "$dir" ] && [[ "$dir" =~ .*/[0-9]+\.[0-9]+$ ]]; then + dockerfile_path="$dir/Dockerfile" + if [ -f "$dockerfile_path" ]; then + base_dir=$(dirname "$dir") + version=$(basename "$dir") + tag_base="${base_dir#$root_directory/}" + tag_base="$(echo $tag_base | sed 's/__*/_/g')" + image_name="$base_image-${tag_base}-swe:$version" + echo "Pushing Docker image: $image_name" + docker push "$image_name" + fi + fi + done +} + +push_docker_images diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.conda_repository b/python/swe/swe_bench_docker/templates/Dockerfile.conda_repository new file mode 100644 index 0000000000..a1f4f3f0f2 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.conda_repository @@ -0,0 +1,20 @@ +# Generated by generate_dockerfiles.py + +FROM {{ base_image }} + +{% if deb_packages %} +USER root + +RUN apt-get update && \ + apt-get install -y {{ deb_packages }} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +USER swe-bench +{% endif %} + +RUN git clone https://github.com/swe-bench/{{ repo_name }}.git + +WORKDIR /home/swe-bench/{{ repo_name }} + +ENV REPO_DIR=/home/swe-bench/{{ repo_name }} diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.conda_testbed b/python/swe/swe_bench_docker/templates/Dockerfile.conda_testbed new file mode 100644 index 0000000000..003fda2da4 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.conda_testbed @@ -0,0 +1,32 @@ +# Generated by generate_dockerfiles.py + +FROM {{ base_image }} + +RUN git -c advice.detachedHead=false checkout {{ environment_setup_commit }} + +{% if path_to_env_file -%} +COPY {{ path_to_env_file }} . +{% endif -%} + +RUN {{conda_create_cmd}} +SHELL ["conda", "run", "-n", "{{ testbed }}", "/bin/bash", "-c"] + +{% if pre_install_cmds -%} +{% for cmd in pre_install_cmds -%} +RUN {{ cmd }} +{% endfor -%} +{% endif -%} + +{% if path_to_reqs -%} +COPY {{ path_to_reqs }} . +{% endif -%} + +{% for cmd in install_cmds %} +RUN {{ cmd }} +{% endfor %} + +RUN git checkout {{ environment_setup_commit }} . + +WORKDIR / + +ENTRYPOINT ["/bin/bash"] diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_instance b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_instance new file mode 100644 index 0000000000..ae683d4f4d --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_instance @@ -0,0 +1,11 @@ +# Generated by generate_dockerfiles.py + +FROM {{ base_image }} + +WORKDIR /opt/{{ repo_name }} + +RUN git -c advice.detachedHead=false checkout {{ base_commit }} + +RUN {{ install_cmd }} + +WORKDIR /opt diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_repository b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_repository new file mode 100644 index 0000000000..f938ddd115 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_repository @@ -0,0 +1,17 @@ +# Generated by generate_dockerfiles.py + +FROM {{ base_image }} + +COPY --from={{ pyenv_image }} /opt/pyenv/versions/3.11.9 /opt/pyenv/versions/3.11.9 + +{% if deb_packages %} +RUN apt-get update && \ + apt-get install -y {{ deb_packages }} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* +{% endif %} + +WORKDIR /opt + +RUN git clone https://github.com/swe-bench/{{ repo_name }}.git +ENV REPO_DIR=/opt/{{ repo_name }} diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_testbed b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_testbed new file mode 100644 index 0000000000..2634060ba2 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.pyenv_testbed @@ -0,0 +1,40 @@ +# Generated by generate_dockerfiles.py + +FROM {{ base_image }} + +{% if deb_packages %} +RUN apt-get update && \ + apt-get install -y {{ deb_packages }} && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* +{% endif %} + +COPY --from={{ pyenv_image }} /opt/pyenv/versions/{{ python_version }} /opt/pyenv/versions/{{ python_version }} + +RUN pyenv rehash && \ + pyenv global {{ python_version }} && \ + pyenv versions + +WORKDIR /opt/{{ repo_name }} + +RUN git -c advice.detachedHead=false checkout {{ environment_setup_commit }} + +{% if pre_install_cmds -%} +{% for cmd in pre_install_cmds -%} +RUN {{ cmd }} +{% endfor -%} +{% endif -%} + +{% if path_to_reqs -%} +COPY {{ path_to_reqs }} . +{% endif -%} + +{% for cmd in install_cmds %} +RUN {{ cmd }} +{% endfor %} + +RUN git checkout {{ environment_setup_commit }} . + +WORKDIR / + +ENTRYPOINT ["/bin/bash"] diff --git a/python/swe/swe_bench_docker/templates/Dockerfile.swe b/python/swe/swe_bench_docker/templates/Dockerfile.swe new file mode 100644 index 0000000000..0528144988 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/Dockerfile.swe @@ -0,0 +1,67 @@ +# Generated by generate_dockerfiles.py + +FROM ubuntu:jammy + +ARG TARGETARCH + +# Install third party tools +RUN apt-get update && \ + apt-get install -y bash gcc git jq wget g++ make libffi-dev build-essential python3.11&& \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Initialize git +RUN git config --global user.email "sweagent@pnlp.org" +RUN git config --global user.name "sweagent" + +# Environment variables +ENV ROOT='/dev/' +RUN prompt() { echo " > "; }; +ENV PS1="> " + +# Create file for tracking edits, test patch +RUN touch /root/files_to_edit.txt +RUN touch /root/test.patch + +# add ls file indicator +RUN echo "alias ls='ls -F'" >> /root/.bashrc + +# Setup Conda +# Install miniconda +ENV PATH="/root/miniconda3/bin:${PATH}" +ARG PATH="/root/miniconda3/bin:${PATH}" +COPY ./swe_bench_docker/templates/getconda.sh . +RUN bash getconda.sh ${TARGETARCH} \ + && rm getconda.sh \ + && mkdir /root/.conda \ + && bash miniconda.sh -b \ + && rm -f miniconda.sh +RUN conda --version \ + && conda init bash \ + && conda config --append channels conda-forge + + +WORKDIR /opt + +RUN git clone https://github.com/swe-bench/{{ repo_name }}.git +ENV REPO_DIR=/opt/{{ repo_name }} + +WORKDIR /opt/{{ repo_name }} + +RUN git -c advice.detachedHead=false checkout 4a72da71001f154ea60906a2f74898d32b7322a7 + +{% if pre_install_cmds -%} +{% for cmd in pre_install_cmds -%} +RUN {{ cmd }} +{% endfor -%} +{% endif -%} + +{% if path_to_reqs -%} +COPY {{ path_to_reqs }} . +{% endif -%} + +{% for cmd in install_cmds %} +RUN {{ cmd }} +{% endfor %} + +ENTRYPOINT ["/bin/bash"] \ No newline at end of file diff --git a/python/swe/swe_bench_docker/templates/getconda.sh b/python/swe/swe_bench_docker/templates/getconda.sh new file mode 100644 index 0000000000..4678569770 --- /dev/null +++ b/python/swe/swe_bench_docker/templates/getconda.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# Helper script to get the right conda version inside of the container +# This logic is put inside of the container rather than in the build script +# so that we can easily do multi-platform builds + +arch=$1 +echo "arch", $arch +if [[ "$arch" == "x86_64" || "$arch" == "amd64" ]]; then + echo "Building the x86 Docker image" + wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-1-Linux-x86_64.sh -O miniconda.sh +elif [[ "$arch" == "aarch64" || "$arch" == "arm64" ]]; then + echo "Ayy, arm64 in the house!" + wget https://repo.anaconda.com/miniconda/Miniconda3-py39_23.11.0-1-Linux-aarch64.sh -O miniconda.sh +else + echo "unknown architecture detected?" + echo $arch + exit 1 +fi