Skip to content

Commit

Permalink
feat: automatically detect string matching correct patches
Browse files Browse the repository at this point in the history
  • Loading branch information
UniverseFly committed Aug 15, 2023
1 parent 4220f4e commit 40088cc
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 5 deletions.
6 changes: 6 additions & 0 deletions src/realm/d4j.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,12 @@ def iter_hunks(self) -> Iterator[tuple[tuple[int, int], BuggyFile, Change]]:
for idx_j, change in enumerate(reversed(buggy_file.changes)):
yield ((idx_i, idx_j), buggy_file, change)

def single_hunk_content(self) -> str:
"""Works only if it is a single-hunk bug"""
assert len(self.buggy_files) == 1
assert len(self.buggy_files[0].changes) == 1
return "".join(self.buggy_files[0].changes[0].added_lines)

def all_changes(self) -> list[list[tuple[int, int]]]:
all_values: list[list[tuple[int, int]]] = []
for buggy_file in self.buggy_files:
Expand Down
25 changes: 24 additions & 1 deletion src/realm/ploting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import shutil
from itertools import groupby
from pathlib import Path
from typing import Callable, Iterable, TypeVar, cast
Expand Down Expand Up @@ -112,22 +113,39 @@ def generation_datapoint_getter(datapoint: GenerationDatapoint) -> list[int]:
}
for proj, proj_values in runner.get_plausible_patches_grouped().items()
}
# D4J.all_bugs[bug_id].iter_hunks

if os.getenv("DUMP") is not None:
plausible_root = Path(os.getenv("DUMP_DIR") or "plausible_patches")
assert runner.report.transformed_result is not None
transformed = runner.report.transformed_result.result_dict
for proj, proj_values in runner.get_plausible_patches_grouped().items():
plausible_root.mkdir(exist_ok=True)
plausible_dir = plausible_root / "1-plausible"
correct_dir = plausible_root / "2-correct"
plausible_dir.mkdir(exist_ok=True)
correct_dir.mkdir(exist_ok=True)
for bug_id, patches in proj_values.items():
ground_truth = D4J.single_hunk_bugs[bug_id].single_hunk_content()
ground_truth_hunk = utils.remove_whitespace(
utils.remove_java_comments(ground_truth)
)
bug_id_dir = plausible_root / bug_id
bug_id_dir.mkdir()
patch_strs: list[str] = []
bugs, _ = transformed[bug_id]
# assert len(bugs) == 1
assert len(bugs) == 1
diffs: list[str] = []
correct = False
for patch_id, patch in enumerate(patches):
hunk = patch[0].hunks[0].result.hunk
assert hunk is not None
patch_hunk = utils.remove_whitespace(
utils.remove_java_comments(hunk)
)
assert len(patch) == len(bugs)
if patch_hunk == ground_truth_hunk:
correct = True
patch_content = concat_hunks(patch, delim=utils.HUNK_RULE)
patch_strs.append(patch_content)
patch_file = (bug_id_dir / str(patch_id)).with_suffix(".txt")
Expand All @@ -152,6 +170,11 @@ def generation_datapoint_getter(datapoint: GenerationDatapoint) -> list[int]:
(bug_id_dir / f"reference.patch").write_text(D4J.get_patch(bug_id))
integrated_diff_file = bug_id_dir / "integrated.diff"
integrated_diff_file.write_text(utils.RULE.join(diffs))
if correct:
# move to correct_dir
shutil.move(bug_id_dir, correct_dir)
else:
shutil.move(bug_id_dir, plausible_dir)

# {
# proj: [
Expand Down
6 changes: 2 additions & 4 deletions src/realm/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def evaluate_generation_summary(self) -> GenerationDatapoint:


def validate_patch(
d4j: Defects4J, bug_id: str, bugs: list[TextFile], patch: AvgPatch, dirty: bool
d4j: Defects4J, bug_id: str, bugs: list[TextFile], patch: AvgPatch
) -> PatchValidationResult:
start_time = time.perf_counter()
assert not patch.is_duplicate
Expand Down Expand Up @@ -647,9 +647,7 @@ def validate_proj(
val_results[patch_idx] = cached[ws_removed_hunk_str]
n_validated += 1
continue
val_result = validate_patch(
d4j, bug_id, buggy_files, patch, dirty=(idx != 0)
)
val_result = validate_patch(d4j, bug_id, buggy_files, patch)
cached[ws_removed_hunk_str] = val_result
assert patch_idx not in val_results
val_results[patch_idx] = val_result
Expand Down

0 comments on commit 40088cc

Please sign in to comment.