Skip to content

Commit 4d05ba9

Browse files
committed
2 parents 1289d53 + 617b5bd commit 4d05ba9

File tree

3 files changed

+35
-9
lines changed

3 files changed

+35
-9
lines changed

README.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,11 @@ Here are some tips to speed up the evaluation:
320320
You can inspect the failed samples by using the following command:
321321
322322
```bash
323-
bigcodebench.inspect --eval-results sample-sanitized-calibrated_eval_results.json --in-place
323+
# Inspect the failed samples and save the results to `inspect/`
324+
bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard
325+
326+
# Re-run the inspection in place
327+
bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard --in_place
324328
```
325329
326330
## 🚀 Full Script

bigcodebench/eval/utils.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import tempfile
3030
import subprocess
3131
import multiprocessing
32+
import time
3233
from typing import Optional
3334

3435
TIMEOUT_LIMIT=240.0
@@ -141,7 +142,7 @@ def safe_kill(pid, sig):
141142
else:
142143
print(f"Prevented attempt to kill PID {pid} with signal {sig}")
143144
except ProcessLookupError:
144-
print(f"Process {pid} does not exist.")
145+
pass
145146

146147
def safe_killpg(pgid, sig):
147148
if pgid == current_pgid or pgid in {os.getpgid(pid) for pid in child_pids}:
@@ -221,7 +222,22 @@ def safe_exec(*args, **kwargs):
221222
try:
222223
yield
223224
finally:
224-
# Restore original functions after the block
225+
for pid in child_pids:
226+
try:
227+
os.kill(pid, signal.SIGTERM)
228+
for _ in range(10):
229+
time.sleep(0.1)
230+
try:
231+
os.kill(pid, 0)
232+
except ProcessLookupError:
233+
break
234+
else:
235+
os.kill(pid, signal.SIGKILL)
236+
except ProcessLookupError:
237+
pass
238+
except Exception as e:
239+
print(f"Error handling process {pid}: {e}")
240+
225241
os.kill = original_kill
226242
os.killpg = original_killpg
227243
os.system = original_system

bigcodebench/inspect.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,25 @@ def inspection(args):
1414
-- completion.py: prompt + completion
1515
-- execution_trace.txt: execution trace
1616
"""
17-
path = os.path.join("inspect", args.eval_results.split("/")[-1].replace(".json", ""))
17+
path = os.path.join(args.save_path, args.eval_results.split("/")[-1].replace(".json", ""))
1818
if args.in_place:
1919
shutil.rmtree(path, ignore_errors=True)
2020
if not os.path.exists(path):
2121
os.makedirs(path)
22-
problems = get_bigcodebench()
22+
problems = get_bigcodebench(subset=args.subset)
2323

2424
eval_results = json.load(open(args.eval_results, "r"))
2525
for task_id, results in eval_results["eval"].items():
26+
if task_id not in problems:
27+
continue
2628
if all(result["status"] == "pass" for result in results):
2729
continue
2830
task_path = os.path.join(path, task_id)
2931
if not os.path.exists(task_path):
3032
os.makedirs(task_path)
3133
task_id_data = problems[task_id]
3234
with open(os.path.join(task_path, "ground_truth.py"), "w") as f:
33-
f.write(task_id_data[f"{args.subset}_prompt"] + "\n\n" + task_id_data["canonical_solution"])
35+
f.write(task_id_data[f"{args.split}_prompt"] + "\n\n" + task_id_data["canonical_solution"])
3436

3537
# write test
3638
with open(os.path.join(task_path, "test_case.py"), "w") as f:
@@ -48,9 +50,13 @@ def inspection(args):
4850
f.write("="*50 + "\n")
4951
def main():
5052
parser = argparse.ArgumentParser()
51-
parser.add_argument("--eval-results", required=True, type=str)
52-
parser.add_argument("--subset", required=True, type=str)
53-
parser.add_argument("--in-place", action="store_true")
53+
parser.add_argument("--eval_results", required=True, type=str)
54+
parser.add_argument(
55+
"--split", required=True, type=str, choices=["complete", "instruct"]
56+
)
57+
parser.add_argument("--subset", default="hard", type=str, choices=["full", "hard"])
58+
parser.add_argument("--save_path", default="inspect", type=str)
59+
parser.add_argument("--in_place", action="store_true")
5460
args = parser.parse_args()
5561

5662
inspection(args)

0 commit comments

Comments
 (0)