Skip to content

Commit d48d14d

Browse files
authored
Merge pull request #1369 from Codium-ai/tr/committable_comments
Tr/committable comments
2 parents b1336e7 + eb0c959 commit d48d14d

File tree

4 files changed

+172
-32
lines changed

4 files changed

+172
-32
lines changed

pr_agent/git_providers/bitbucket_provider.py

+28-13
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import difflib
12
import json
3+
import re
24
from typing import Optional, Tuple
35
from urllib.parse import urlparse
46

@@ -72,24 +74,38 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
7274
post_parameters_list = []
7375
for suggestion in code_suggestions:
7476
body = suggestion["body"]
77+
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
78+
if original_suggestion:
79+
try:
80+
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
81+
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
82+
diff = difflib.unified_diff(existing_code.split('\n'),
83+
improved_code.split('\n'), n=999)
84+
patch_orig = "\n".join(diff)
85+
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
86+
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
87+
# replace ```suggestion ... ``` with diff_code, using regex:
88+
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
89+
except Exception as e:
90+
get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
91+
continue
92+
7593
relevant_file = suggestion["relevant_file"]
7694
relevant_lines_start = suggestion["relevant_lines_start"]
7795
relevant_lines_end = suggestion["relevant_lines_end"]
7896

7997
if not relevant_lines_start or relevant_lines_start == -1:
80-
if get_settings().config.verbosity_level >= 2:
81-
get_logger().exception(
82-
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
83-
)
98+
get_logger().exception(
99+
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
100+
)
84101
continue
85102

86103
if relevant_lines_end < relevant_lines_start:
87-
if get_settings().config.verbosity_level >= 2:
88-
get_logger().exception(
89-
f"Failed to publish code suggestion, "
90-
f"relevant_lines_end is {relevant_lines_end} and "
91-
f"relevant_lines_start is {relevant_lines_start}"
92-
)
104+
get_logger().exception(
105+
f"Failed to publish code suggestion, "
106+
f"relevant_lines_end is {relevant_lines_end} and "
107+
f"relevant_lines_start is {relevant_lines_start}"
108+
)
93109
continue
94110

95111
if relevant_lines_end > relevant_lines_start:
@@ -113,16 +129,15 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
113129
self.publish_inline_comments(post_parameters_list)
114130
return True
115131
except Exception as e:
116-
if get_settings().config.verbosity_level >= 2:
117-
get_logger().error(f"Failed to publish code suggestion, error: {e}")
132+
get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}")
118133
return False
119134

120135
def publish_file_comments(self, file_comments: list) -> bool:
121136
pass
122137

123138
def is_supported(self, capability: str) -> bool:
124139
if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown',
125-
'publish_file_comments']:
140+
'publish_file_comments']:
126141
return False
127142
return True
128143

pr_agent/git_providers/bitbucket_server_provider.py

+26-10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import difflib
2+
import re
3+
14
from packaging.version import parse as parse_version
25
from typing import Optional, Tuple
36
from urllib.parse import quote_plus, urlparse
@@ -67,24 +70,37 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
6770
post_parameters_list = []
6871
for suggestion in code_suggestions:
6972
body = suggestion["body"]
73+
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
74+
if original_suggestion:
75+
try:
76+
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
77+
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
78+
diff = difflib.unified_diff(existing_code.split('\n'),
79+
improved_code.split('\n'), n=999)
80+
patch_orig = "\n".join(diff)
81+
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
82+
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
83+
# replace ```suggestion ... ``` with diff_code, using regex:
84+
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
85+
except Exception as e:
86+
get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
87+
continue
7088
relevant_file = suggestion["relevant_file"]
7189
relevant_lines_start = suggestion["relevant_lines_start"]
7290
relevant_lines_end = suggestion["relevant_lines_end"]
7391

7492
if not relevant_lines_start or relevant_lines_start == -1:
75-
if get_settings().config.verbosity_level >= 2:
76-
get_logger().exception(
77-
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
78-
)
93+
get_logger().warning(
94+
f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
95+
)
7996
continue
8097

8198
if relevant_lines_end < relevant_lines_start:
82-
if get_settings().config.verbosity_level >= 2:
83-
get_logger().exception(
84-
f"Failed to publish code suggestion, "
85-
f"relevant_lines_end is {relevant_lines_end} and "
86-
f"relevant_lines_start is {relevant_lines_start}"
87-
)
99+
get_logger().warning(
100+
f"Failed to publish code suggestion, "
101+
f"relevant_lines_end is {relevant_lines_end} and "
102+
f"relevant_lines_start is {relevant_lines_start}"
103+
)
88104
continue
89105

90106
if relevant_lines_end > relevant_lines_start:

pr_agent/git_providers/github_provider.py

+105-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import copy
2+
import difflib
13
import hashlib
24
import itertools
5+
import re
36
import time
47
import traceback
58
from datetime import datetime
@@ -11,6 +14,7 @@
1114
from starlette_context import context
1215

1316
from ..algo.file_filter import filter_ignored
17+
from ..algo.git_patch_processing import extract_hunk_headers
1418
from ..algo.language_handler import is_valid_file
1519
from ..algo.types import EDIT_TYPE
1620
from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
@@ -415,7 +419,10 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
415419
Publishes code suggestions as comments on the PR.
416420
"""
417421
post_parameters_list = []
418-
for suggestion in code_suggestions:
422+
423+
code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions)
424+
425+
for suggestion in code_suggestions_validated:
419426
body = suggestion['body']
420427
relevant_file = suggestion['relevant_file']
421428
relevant_lines_start = suggestion['relevant_lines_start']
@@ -872,3 +879,100 @@ def auto_approve(self) -> bool:
872879

873880
def calc_pr_statistics(self, pull_request_data: dict):
874881
return {}
882+
883+
def validate_comments_inside_hunks(self, code_suggestions):
884+
"""
885+
validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub
886+
"""
887+
code_suggestions_copy = copy.deepcopy(code_suggestions)
888+
diff_files = self.get_diff_files()
889+
RE_HUNK_HEADER = re.compile(
890+
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
891+
892+
# map file extensions to programming languages
893+
language_extension_map_org = get_settings().language_extension_map_org
894+
extension_to_language = {}
895+
for language, extensions in language_extension_map_org.items():
896+
for ext in extensions:
897+
extension_to_language[ext] = language
898+
for file in diff_files:
899+
extension_s = '.' + file.filename.rsplit('.')[-1]
900+
language_name = "txt"
901+
if extension_s and (extension_s in extension_to_language):
902+
language_name = extension_to_language[extension_s]
903+
file.language = language_name.lower()
904+
905+
for suggestion in code_suggestions_copy:
906+
try:
907+
relevant_file_path = suggestion['relevant_file']
908+
for file in diff_files:
909+
if file.filename == relevant_file_path:
910+
911+
# generate on-demand the patches range for the relevant file
912+
patch_str = file.patch
913+
if not hasattr(file, 'patches_range'):
914+
file.patches_range = []
915+
patch_lines = patch_str.splitlines()
916+
for i, line in enumerate(patch_lines):
917+
if line.startswith('@@'):
918+
match = RE_HUNK_HEADER.match(line)
919+
# identify hunk header
920+
if match:
921+
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
922+
file.patches_range.append({'start': start2, 'end': start2 + size2 - 1})
923+
924+
patches_range = file.patches_range
925+
comment_start_line = suggestion.get('relevant_lines_start', None)
926+
comment_end_line = suggestion.get('relevant_lines_end', None)
927+
original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
928+
if not comment_start_line or not comment_end_line or not original_suggestion:
929+
continue
930+
931+
# check if the comment is inside a valid hunk
932+
is_valid_hunk = False
933+
min_distance = float('inf')
934+
patch_range_min = None
935+
# find the hunk that contains the comment, or the closest one
936+
for i, patch_range in enumerate(patches_range):
937+
d1 = comment_start_line - patch_range['start']
938+
d2 = patch_range['end'] - comment_end_line
939+
if d1 >= 0 and d2 >= 0: # found a valid hunk
940+
is_valid_hunk = True
941+
min_distance = 0
942+
patch_range_min = patch_range
943+
break
944+
elif d1 * d2 <= 0: # comment is possibly inside the hunk
945+
d1_clip = abs(min(0, d1))
946+
d2_clip = abs(min(0, d2))
947+
d = max(d1_clip, d2_clip)
948+
if d < min_distance:
949+
patch_range_min = patch_range
950+
min_distance = min(min_distance, d)
951+
if not is_valid_hunk:
952+
if min_distance < 10: # 10 lines - a reasonable distance to consider the comment inside the hunk
953+
# make the suggestion non-committable, yet multi line
954+
suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start'])
955+
suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end'])
956+
body = suggestion['body'].strip()
957+
958+
# present new diff code in collapsible
959+
existing_code = original_suggestion['existing_code'].rstrip() + "\n"
960+
improved_code = original_suggestion['improved_code'].rstrip() + "\n"
961+
diff = difflib.unified_diff(existing_code.split('\n'),
962+
improved_code.split('\n'), n=999)
963+
patch_orig = "\n".join(diff)
964+
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
965+
diff_code = f"\n\n<details><summary>New proposed code:</summary>\n\n```diff\n{patch.rstrip()}\n```"
966+
# replace ```suggestion ... ``` with diff_code, using regex:
967+
body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
968+
body += "\n\n</details>"
969+
suggestion['body'] = body
970+
get_logger().info(f"Comment was moved to a valid hunk, "
971+
f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
972+
else:
973+
get_logger().error(f"Comment is not inside a valid hunk, "
974+
f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
975+
except Exception as e:
976+
get_logger().error(f"Failed to process patch for committable comment, error: {e}")
977+
return code_suggestions_copy
978+

pr_agent/git_providers/gitlab_provider.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import difflib
12
import hashlib
23
import re
34
from typing import Optional, Tuple
@@ -278,20 +279,23 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
278279
new_code_snippet = original_suggestion['improved_code']
279280
content = original_suggestion['suggestion_content']
280281
label = original_suggestion['label']
281-
if 'score' in original_suggestion:
282-
score = original_suggestion['score']
283-
else:
284-
score = 7
282+
score = original_suggestion.get('score', 7)
285283

286284
if hasattr(self, 'main_language'):
287285
language = self.main_language
288286
else:
289287
language = ''
290288
link = self.get_line_link(relevant_file, line_start, line_end)
291-
body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n___\n"
292-
body_fallback +=f"\n\nReplace lines ([{line_start}-{line_end}]({link}))\n\n```{language}\n{old_code_snippet}\n````\n\n"
293-
body_fallback +=f"with\n\n```{language}\n{new_code_snippet}\n````"
294-
body_fallback += f"\n\n___\n\n`(Cannot implement this suggestion directly, as gitlab API does not enable committing to a non -+ line in a PR)`"
289+
body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
290+
body_fallback +=f"\n\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\n\n"
291+
body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
292+
body_fallback+="</details>\n\n"
293+
diff_patch = difflib.unified_diff(old_code_snippet.split('\n'),
294+
new_code_snippet.split('\n'), n=999)
295+
patch_orig = "\n".join(diff_patch)
296+
patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
297+
diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
298+
body_fallback += diff_code
295299

296300
# Create a general note on the file in the MR
297301
self.mr.notes.create({
@@ -304,6 +308,7 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
304308
'file_path': f'{target_file.filename}',
305309
}
306310
})
311+
get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}")
307312

308313
# get_logger().debug(
309314
# f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)")

0 commit comments

Comments
 (0)