From 4f838e1884a05dcaf7281b5e161bffdeb29e94e0 Mon Sep 17 00:00:00 2001 From: Yuxiang Wei Date: Wed, 16 Aug 2023 10:20:37 -0500 Subject: [PATCH] feat: better patch dumping --- data/considered_bugs.json | 275 ++++++++++++++++++++++++++++++++++++++ src/realm/d4j.py | 7 +- src/realm/generation.py | 4 +- src/realm/ploting.py | 17 ++- src/realm/repair.py | 4 +- 5 files changed, 298 insertions(+), 9 deletions(-) create mode 100644 data/considered_bugs.json diff --git a/data/considered_bugs.json b/data/considered_bugs.json new file mode 100644 index 0000000..fba828d --- /dev/null +++ b/data/considered_bugs.json @@ -0,0 +1,275 @@ +[ + "Chart-1", + "Chart-3", + "Chart-6", + "Chart-8", + "Chart-9", + "Chart-10", + "Chart-12", + "Chart-13", + "Chart-17", + "Chart-20", + "Chart-23", + "Chart-24", + "Cli-4", + "Cli-8", + "Cli-11", + "Cli-17", + "Cli-25", + "Cli-28", + "Cli-32", + "Cli-35", + "Cli-40", + "Closure-1", + "Closure-5", + "Closure-10", + "Closure-11", + "Closure-12", + "Closure-14", + "Closure-15", + "Closure-18", + "Closure-19", + "Closure-20", + "Closure-28", + "Closure-31", + "Closure-33", + "Closure-35", + "Closure-36", + "Closure-38", + "Closure-46", + "Closure-52", + "Closure-55", + "Closure-57", + "Closure-59", + "Closure-61", + "Closure-62", + "Closure-65", + "Closure-66", + "Closure-67", + "Closure-69", + "Closure-70", + "Closure-71", + "Closure-73", + "Closure-77", + "Closure-81", + "Closure-82", + "Closure-83", + "Closure-86", + "Closure-91", + "Closure-92", + "Closure-97", + "Closure-104", + "Closure-107", + "Closure-109", + "Closure-111", + "Closure-112", + "Closure-113", + "Closure-114", + "Closure-118", + "Closure-119", + "Closure-120", + "Closure-122", + "Closure-123", + "Closure-125", + "Closure-126", + "Closure-129", + "Closure-130", + "Closure-132", + "Closure-133", + "Closure-146", + "Closure-150", + "Closure-152", + "Closure-159", + "Closure-161", + "Closure-166", + "Closure-168", + "Codec-2", + "Codec-3", + "Codec-4", + "Codec-5", + "Codec-7", + "Codec-9", + "Codec-10", + "Codec-16", + "Codec-17", + "Codec-18", + "Collections-26", + "Collections-27", + "Collections-28", + "Compress-1", + "Compress-5", + "Compress-12", + "Compress-13", + "Compress-14", + "Compress-19", + "Compress-23", + "Compress-25", + "Compress-26", + "Compress-27", + "Compress-30", + "Compress-31", + "Compress-36", + "Compress-37", + "Compress-38", + "Compress-46", + "Csv-1", + "Csv-4", + "Csv-9", + "Csv-10", + "Csv-11", + "Csv-12", + "Csv-14", + "Csv-15", + "Gson-5", + "Gson-10", + "Gson-11", + "Gson-13", + "Gson-15", + "Gson-17", + "JacksonCore-5", + "JacksonCore-6", + "JacksonCore-7", + "JacksonCore-8", + "JacksonCore-11", + "JacksonCore-13", + "JacksonCore-20", + "JacksonCore-25", + "JacksonDatabind-1", + "JacksonDatabind-12", + "JacksonDatabind-16", + "JacksonDatabind-17", + "JacksonDatabind-19", + "JacksonDatabind-27", + "JacksonDatabind-33", + "JacksonDatabind-37", + "JacksonDatabind-42", + "JacksonDatabind-45", + "JacksonDatabind-46", + "JacksonDatabind-57", + "JacksonDatabind-62", + "JacksonDatabind-70", + "JacksonDatabind-71", + "JacksonDatabind-74", + "JacksonDatabind-76", + "JacksonDatabind-82", + "JacksonDatabind-84", + "JacksonDatabind-86", + "JacksonDatabind-93", + "JacksonDatabind-99", + "JacksonDatabind-102", + "Jsoup-1", + "Jsoup-2", + "Jsoup-5", + "Jsoup-9", + "Jsoup-10", + "Jsoup-13", + "Jsoup-15", + "Jsoup-17", + "Jsoup-19", + "Jsoup-20", + "Jsoup-24", + "Jsoup-25", + "Jsoup-26", + "Jsoup-32", + "Jsoup-33", + "Jsoup-34", + "Jsoup-35", + "Jsoup-37", + "Jsoup-39", + "Jsoup-40", + "Jsoup-41", + "Jsoup-43", + "Jsoup-45", + "Jsoup-46", + "Jsoup-51", + "Jsoup-54", + "Jsoup-55", + "Jsoup-57", + "Jsoup-61", + "Jsoup-62", + "Jsoup-68", + "Jsoup-69", + "Jsoup-72", + "Jsoup-75", + "Jsoup-76", + "Jsoup-77", + "Jsoup-84", + "Jsoup-86", + "Jsoup-88", + "Jsoup-90", + "Jsoup-93", + "JxPath-5", + "JxPath-10", + "JxPath-12", + "JxPath-21", + "Lang-6", + "Lang-9", + "Lang-11", + "Lang-16", + "Lang-21", + "Lang-22", + "Lang-24", + "Lang-25", + "Lang-26", + "Lang-29", + "Lang-33", + "Lang-38", + "Lang-39", + "Lang-40", + "Lang-43", + "Lang-44", + "Lang-45", + "Lang-49", + "Lang-51", + "Lang-52", + "Lang-54", + "Lang-57", + "Lang-58", + "Lang-59", + "Lang-61", + "Math-2", + "Math-3", + "Math-5", + "Math-9", + "Math-10", + "Math-11", + "Math-19", + "Math-20", + "Math-27", + "Math-30", + "Math-32", + "Math-33", + "Math-34", + "Math-39", + "Math-41", + "Math-45", + "Math-48", + "Math-50", + "Math-53", + "Math-56", + "Math-57", + "Math-58", + "Math-59", + "Math-63", + "Math-69", + "Math-70", + "Math-73", + "Math-75", + "Math-80", + "Math-82", + "Math-85", + "Math-91", + "Math-94", + "Math-96", + "Math-101", + "Math-104", + "Math-105", + "Time-4", + "Time-11", + "Time-14", + "Time-15", + "Time-16", + "Time-19", + "Time-24", + "Time-25" +] \ No newline at end of file diff --git a/src/realm/d4j.py b/src/realm/d4j.py index 5a479e6..88774ad 100644 --- a/src/realm/d4j.py +++ b/src/realm/d4j.py @@ -1,5 +1,6 @@ import csv import itertools +import json import multiprocessing as mp import os import subprocess @@ -189,7 +190,11 @@ def __init__( assert self.java8_home.exists() assert d4j_checkout_root.exists() self.metadata = self._get_metadata() - self.all_bugs = self._all_bugs() + all_bugs = self._all_bugs() + considered_bugs = json.loads(Path("data/considered_bugs.json").read_text()) + self.all_bugs = { + id: bug for id, bug in all_bugs.items() if id in considered_bugs + } self.single_hunk_bugs = { id: bug for (id, bug) in self.all_bugs.items() diff --git a/src/realm/generation.py b/src/realm/generation.py index 333fcd4..ac5b391 100644 --- a/src/realm/generation.py +++ b/src/realm/generation.py @@ -80,10 +80,12 @@ "long", "strictfp", "volatile", - "const" "float", + "const", + "float", "native", "super", "while", + "null", } # JDT.LS on Chart-11 is buggy diff --git a/src/realm/ploting.py b/src/realm/ploting.py index af068a0..d03ebd4 100644 --- a/src/realm/ploting.py +++ b/src/realm/ploting.py @@ -117,10 +117,11 @@ def generation_datapoint_getter(datapoint: GenerationDatapoint) -> list[int]: if os.getenv("DUMP") is not None: plausible_root = Path(os.getenv("DUMP_DIR") or "plausible_patches") + plausible_root = plausible_root / runner.report.root.name assert runner.report.transformed_result is not None transformed = runner.report.transformed_result.result_dict for proj, proj_values in runner.get_plausible_patches_grouped().items(): - plausible_root.mkdir(exist_ok=True) + plausible_root.mkdir(exist_ok=True, parents=True) plausible_dir = plausible_root / "1-plausible" correct_dir = plausible_root / "2-correct" plausible_dir.mkdir(exist_ok=True) @@ -144,7 +145,10 @@ def generation_datapoint_getter(datapoint: GenerationDatapoint) -> list[int]: utils.remove_java_comments(hunk) ) assert len(patch) == len(bugs) - if patch_hunk == ground_truth_hunk: + if patch_hunk == ground_truth_hunk and not correct: + inferred_correct = bug_id_dir / "inferred_correct.txt" + text = f"PATCH\n{hunk}\n\nGROUND TRUTH\n{ground_truth}" + inferred_correct.write_text(text) correct = True patch_content = concat_hunks(patch, delim=utils.HUNK_RULE) patch_strs.append(patch_content) @@ -164,17 +168,20 @@ def generation_datapoint_getter(datapoint: GenerationDatapoint) -> list[int]: diff = utils.DIFF_RULE.join(file_diffs) diff_file = (bug_id_dir / str(patch_id)).with_suffix(".diff") diff_file.write_text(diff) + correct_diff = bug_id_dir / "correct.diff" + if correct and not correct_diff.exists(): + correct_diff.write_text(diff) diffs.append(diff) integrated_file = bug_id_dir / "integrated.txt" integrated_file.write_text(utils.RULE.join(patch_strs)) - (bug_id_dir / f"reference.patch").write_text(D4J.get_patch(bug_id)) + (bug_id_dir / f"reference.reversed.patch").write_text( + D4J.get_patch(bug_id) + ) integrated_diff_file = bug_id_dir / "integrated.diff" integrated_diff_file.write_text(utils.RULE.join(diffs)) if correct: # move to correct_dir shutil.move(bug_id_dir, correct_dir) - else: - shutil.move(bug_id_dir, plausible_dir) # { # proj: [ diff --git a/src/realm/repair.py b/src/realm/repair.py index e241947..0f56ecc 100644 --- a/src/realm/repair.py +++ b/src/realm/repair.py @@ -319,8 +319,8 @@ def repair(self, report: Report): for bug_id, bug in bugs_to_repair.items(): gen.CHART_11 = bug_id == "Chart-11" # import json - if os.getenv("FIX_SEED_EVERY_BUG") is not None: - self.fix_seed() + # if os.getenv("FIX_SEED_EVERY_BUG") is not None: + self.fix_seed() self.repair_bug(report, bug_id, bug) # json.dump(DIAGNOSTICS, open("diagnostics.json", "w"), indent=2) # import json