diff --git a/benchmarks/swebench/eval_infer.py b/benchmarks/swebench/eval_infer.py index 10782e6c..206a8076 100644 --- a/benchmarks/swebench/eval_infer.py +++ b/benchmarks/swebench/eval_infer.py @@ -117,11 +117,11 @@ def convert_to_swebench_format(input_file: str, output_file: str) -> None: def run_swebench_evaluation( predictions_file: str, run_id: str, - dataset: str = EVAL_DEFAULTS["dataset"], - workers: int = EVAL_DEFAULTS["workers"], - split: str = EVAL_DEFAULTS["split"], - modal: bool = EVAL_DEFAULTS["modal"], - timeout: int = EVAL_DEFAULTS["timeout"], + dataset: str, + workers: int, + split: str, + modal: bool, + timeout: int, ) -> None: """ Run SWE-Bench evaluation on the predictions file. diff --git a/benchmarks/swtbench/eval_infer.py b/benchmarks/swtbench/eval_infer.py index c245aa42..e5bdc9b5 100644 --- a/benchmarks/swtbench/eval_infer.py +++ b/benchmarks/swtbench/eval_infer.py @@ -69,7 +69,7 @@ def _load_prediction_instance_ids(predictions_file: Path) -> list[str]: def try_pull_prebaked_images( predictions_file: Path, dataset: str, - split: str = EVAL_DEFAULTS["split"], + split: str, registry: str = PREBAKED_REGISTRY, ) -> None: """ @@ -418,6 +418,7 @@ def main() -> None: try_pull_prebaked_images( output_file, args.dataset, + args.split, ) else: logger.info(