Merge pull request #1369 from Codium-ai/tr/committable_comments

mrT23 · web-flow · commit d48d14dac78f · 2024-11-20T17:49:08.000+02:00
Tr/committable comments
diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py
@@ -1,4 +1,6 @@
+import difflib
 import json
+import re
 from typing import Optional, Tuple
 from urllib.parse import urlparse
 
@@ -72,24 +74,38 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
         post_parameters_list = []
         for suggestion in code_suggestions:
             body = suggestion["body"]
+            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code
+            if original_suggestion:
+                try:
+                    existing_code = original_suggestion['existing_code'].rstrip() + "\n"
+                    improved_code = original_suggestion['improved_code'].rstrip() + "\n"
+                    diff = difflib.unified_diff(existing_code.split('\n'),
+                                                improved_code.split('\n'), n=999)
+                    patch_orig = "\n".join(diff)
+                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
+                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
+                    # replace ```suggestion ... ``` with diff_code, using regex:
+                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
+                except Exception as e:
+                    get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
+                    continue
+
             relevant_file = suggestion["relevant_file"]
             relevant_lines_start = suggestion["relevant_lines_start"]
             relevant_lines_end = suggestion["relevant_lines_end"]
 
             if not relevant_lines_start or relevant_lines_start == -1:
-                if get_settings().config.verbosity_level >= 2:
-                    get_logger().exception(
-                        f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
-                    )
+                get_logger().exception(
+                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
+                )
                 continue
 
             if relevant_lines_end < relevant_lines_start:
-                if get_settings().config.verbosity_level >= 2:
-                    get_logger().exception(
-                        f"Failed to publish code suggestion, "
-                        f"relevant_lines_end is {relevant_lines_end} and "
-                        f"relevant_lines_start is {relevant_lines_start}"
-                    )
+                get_logger().exception(
+                    f"Failed to publish code suggestion, "
+                    f"relevant_lines_end is {relevant_lines_end} and "
+                    f"relevant_lines_start is {relevant_lines_start}"
+                )
                 continue
 
             if relevant_lines_end > relevant_lines_start:
@@ -113,16 +129,15 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
             self.publish_inline_comments(post_parameters_list)
             return True
         except Exception as e:
-            if get_settings().config.verbosity_level >= 2:
-                get_logger().error(f"Failed to publish code suggestion, error: {e}")
+            get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}")
             return False
 
     def publish_file_comments(self, file_comments: list) -> bool:
         pass
 
     def is_supported(self, capability: str) -> bool:
         if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown',
-                          'publish_file_comments']:
+                            'publish_file_comments']:
             return False
         return True
 
diff --git a/pr_agent/git_providers/bitbucket_server_provider.py b/pr_agent/git_providers/bitbucket_server_provider.py
@@ -1,3 +1,6 @@
+import difflib
+import re
+
 from packaging.version import parse as parse_version
 from typing import Optional, Tuple
 from urllib.parse import quote_plus, urlparse
@@ -67,24 +70,37 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
         post_parameters_list = []
         for suggestion in code_suggestions:
             body = suggestion["body"]
+            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code
+            if original_suggestion:
+                try:
+                    existing_code = original_suggestion['existing_code'].rstrip() + "\n"
+                    improved_code = original_suggestion['improved_code'].rstrip() + "\n"
+                    diff = difflib.unified_diff(existing_code.split('\n'),
+                                                improved_code.split('\n'), n=999)
+                    patch_orig = "\n".join(diff)
+                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
+                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
+                    # replace ```suggestion ... ``` with diff_code, using regex:
+                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
+                except Exception as e:
+                    get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
+                    continue
             relevant_file = suggestion["relevant_file"]
             relevant_lines_start = suggestion["relevant_lines_start"]
             relevant_lines_end = suggestion["relevant_lines_end"]
 
             if not relevant_lines_start or relevant_lines_start == -1:
-                if get_settings().config.verbosity_level >= 2:
-                    get_logger().exception(
-                        f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
-                    )
+                get_logger().warning(
+                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
+                )
                 continue
 
             if relevant_lines_end < relevant_lines_start:
-                if get_settings().config.verbosity_level >= 2:
-                    get_logger().exception(
-                        f"Failed to publish code suggestion, "
-                        f"relevant_lines_end is {relevant_lines_end} and "
-                        f"relevant_lines_start is {relevant_lines_start}"
-                    )
+                get_logger().warning(
+                    f"Failed to publish code suggestion, "
+                    f"relevant_lines_end is {relevant_lines_end} and "
+                    f"relevant_lines_start is {relevant_lines_start}"
+                )
                 continue
 
             if relevant_lines_end > relevant_lines_start:
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
@@ -1,5 +1,8 @@
+import copy
+import difflib
 import hashlib
 import itertools
+import re
 import time
 import traceback
 from datetime import datetime
@@ -11,6 +14,7 @@
 from starlette_context import context
 
 from ..algo.file_filter import filter_ignored
+from ..algo.git_patch_processing import extract_hunk_headers
 from ..algo.language_handler import is_valid_file
 from ..algo.types import EDIT_TYPE
 from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
@@ -415,7 +419,10 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
         Publishes code suggestions as comments on the PR.
         """
         post_parameters_list = []
-        for suggestion in code_suggestions:
+
+        code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions)
+
+        for suggestion in code_suggestions_validated:
             body = suggestion['body']
             relevant_file = suggestion['relevant_file']
             relevant_lines_start = suggestion['relevant_lines_start']
@@ -872,3 +879,100 @@ def auto_approve(self) -> bool:
 
     def calc_pr_statistics(self, pull_request_data: dict):
             return {}
+
+    def validate_comments_inside_hunks(self, code_suggestions):
+        """
+        validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub
+        """
+        code_suggestions_copy = copy.deepcopy(code_suggestions)
+        diff_files = self.get_diff_files()
+        RE_HUNK_HEADER = re.compile(
+            r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+
+        # map file extensions to programming languages
+        language_extension_map_org = get_settings().language_extension_map_org
+        extension_to_language = {}
+        for language, extensions in language_extension_map_org.items():
+            for ext in extensions:
+                extension_to_language[ext] = language
+        for file in diff_files:
+            extension_s = '.' + file.filename.rsplit('.')[-1]
+            language_name = "txt"
+            if extension_s and (extension_s in extension_to_language):
+                language_name = extension_to_language[extension_s]
+            file.language = language_name.lower()
+
+        for suggestion in code_suggestions_copy:
+            try:
+                relevant_file_path = suggestion['relevant_file']
+                for file in diff_files:
+                    if file.filename == relevant_file_path:
+
+                        # generate on-demand the patches range for the relevant file
+                        patch_str = file.patch
+                        if not hasattr(file, 'patches_range'):
+                            file.patches_range = []
+                            patch_lines = patch_str.splitlines()
+                            for i, line in enumerate(patch_lines):
+                                if line.startswith('@@'):
+                                    match = RE_HUNK_HEADER.match(line)
+                                    # identify hunk header
+                                    if match:
+                                        section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
+                                        file.patches_range.append({'start': start2, 'end': start2 + size2 - 1})
+
+                        patches_range = file.patches_range
+                        comment_start_line = suggestion.get('relevant_lines_start', None)
+                        comment_end_line = suggestion.get('relevant_lines_end', None)
+                        original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
+                        if not comment_start_line or not comment_end_line or not original_suggestion:
+                            continue
+
+                        # check if the comment is inside a valid hunk
+                        is_valid_hunk = False
+                        min_distance = float('inf')
+                        patch_range_min = None
+                        # find the hunk that contains the comment, or the closest one
+                        for i, patch_range in enumerate(patches_range):
+                            d1 = comment_start_line - patch_range['start']
+                            d2 = patch_range['end'] - comment_end_line
+                            if d1 >= 0 and d2 >= 0:  # found a valid hunk
+                                is_valid_hunk = True
+                                min_distance = 0
+                                patch_range_min = patch_range
+                                break
+                            elif d1 * d2 <= 0:  # comment is possibly inside the hunk
+                                d1_clip = abs(min(0, d1))
+                                d2_clip = abs(min(0, d2))
+                                d = max(d1_clip, d2_clip)
+                                if d < min_distance:
+                                    patch_range_min = patch_range
+                                    min_distance = min(min_distance, d)
+                        if not is_valid_hunk:
+                            if min_distance < 10:  # 10 lines - a reasonable distance to consider the comment inside the hunk
+                                # make the suggestion non-committable, yet multi line
+                                suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start'])
+                                suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end'])
+                                body = suggestion['body'].strip()
+
+                                # present new diff code in collapsible
+                                existing_code = original_suggestion['existing_code'].rstrip() + "\n"
+                                improved_code = original_suggestion['improved_code'].rstrip() + "\n"
+                                diff = difflib.unified_diff(existing_code.split('\n'),
+                                                            improved_code.split('\n'), n=999)
+                                patch_orig = "\n".join(diff)
+                                patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
+                                diff_code = f"\n\n<details><summary>New proposed code:</summary>\n\n```diff\n{patch.rstrip()}\n```"
+                                # replace ```suggestion ... ``` with diff_code, using regex:
+                                body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
+                                body += "\n\n</details>"
+                                suggestion['body'] = body
+                                get_logger().info(f"Comment was moved to a valid hunk, "
+                                                  f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
+                            else:
+                                get_logger().error(f"Comment is not inside a valid hunk, "
+                                                   f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
+            except Exception as e:
+                get_logger().error(f"Failed to process patch for committable comment, error: {e}")
+        return code_suggestions_copy
+
diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py
@@ -1,3 +1,4 @@
+import difflib
 import hashlib
 import re
 from typing import Optional, Tuple
@@ -278,20 +279,23 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
                         new_code_snippet = original_suggestion['improved_code']
                         content = original_suggestion['suggestion_content']
                         label = original_suggestion['label']
-                        if 'score' in original_suggestion:
-                            score = original_suggestion['score']
-                        else:
-                            score = 7
+                        score = original_suggestion.get('score', 7)
 
                     if hasattr(self, 'main_language'):
                         language = self.main_language
                     else:
                         language = ''
                     link = self.get_line_link(relevant_file, line_start, line_end)
-                    body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n___\n"
-                    body_fallback +=f"\n\nReplace  lines ([{line_start}-{line_end}]({link}))\n\n```{language}\n{old_code_snippet}\n````\n\n"
-                    body_fallback +=f"with\n\n```{language}\n{new_code_snippet}\n````"
-                    body_fallback += f"\n\n___\n\n`(Cannot implement this suggestion directly, as gitlab API does not enable committing to a non -+ line in a PR)`"
+                    body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
+                    body_fallback +=f"\n\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\n\n"
+                    body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
+                    body_fallback+="</details>\n\n"
+                    diff_patch = difflib.unified_diff(old_code_snippet.split('\n'),
+                                                new_code_snippet.split('\n'), n=999)
+                    patch_orig = "\n".join(diff_patch)
+                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
+                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
+                    body_fallback += diff_code
 
                     # Create a general note on the file in the MR
                     self.mr.notes.create({
@@ -304,6 +308,7 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f
                             'file_path': f'{target_file.filename}',
                         }
                     })
+                    get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}")
 
                     # get_logger().debug(
                     #     f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)")