|
| 1 | +import copy |
| 2 | +import difflib |
1 | 3 | import hashlib
|
2 | 4 | import itertools
|
| 5 | +import re |
3 | 6 | import time
|
4 | 7 | import traceback
|
5 | 8 | from datetime import datetime
|
|
11 | 14 | from starlette_context import context
|
12 | 15 |
|
13 | 16 | from ..algo.file_filter import filter_ignored
|
| 17 | +from ..algo.git_patch_processing import extract_hunk_headers |
14 | 18 | from ..algo.language_handler import is_valid_file
|
15 | 19 | from ..algo.types import EDIT_TYPE
|
16 | 20 | from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
|
@@ -415,7 +419,10 @@ def publish_code_suggestions(self, code_suggestions: list) -> bool:
|
415 | 419 | Publishes code suggestions as comments on the PR.
|
416 | 420 | """
|
417 | 421 | post_parameters_list = []
|
418 |
| - for suggestion in code_suggestions: |
| 422 | + |
| 423 | + code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions) |
| 424 | + |
| 425 | + for suggestion in code_suggestions_validated: |
419 | 426 | body = suggestion['body']
|
420 | 427 | relevant_file = suggestion['relevant_file']
|
421 | 428 | relevant_lines_start = suggestion['relevant_lines_start']
|
@@ -872,3 +879,100 @@ def auto_approve(self) -> bool:
|
872 | 879 |
|
873 | 880 | def calc_pr_statistics(self, pull_request_data: dict):
|
874 | 881 | return {}
|
| 882 | + |
| 883 | + def validate_comments_inside_hunks(self, code_suggestions): |
| 884 | + """ |
| 885 | + validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub |
| 886 | + """ |
| 887 | + code_suggestions_copy = copy.deepcopy(code_suggestions) |
| 888 | + diff_files = self.get_diff_files() |
| 889 | + RE_HUNK_HEADER = re.compile( |
| 890 | + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") |
| 891 | + |
| 892 | + # map file extensions to programming languages |
| 893 | + language_extension_map_org = get_settings().language_extension_map_org |
| 894 | + extension_to_language = {} |
| 895 | + for language, extensions in language_extension_map_org.items(): |
| 896 | + for ext in extensions: |
| 897 | + extension_to_language[ext] = language |
| 898 | + for file in diff_files: |
| 899 | + extension_s = '.' + file.filename.rsplit('.')[-1] |
| 900 | + language_name = "txt" |
| 901 | + if extension_s and (extension_s in extension_to_language): |
| 902 | + language_name = extension_to_language[extension_s] |
| 903 | + file.language = language_name.lower() |
| 904 | + |
| 905 | + for suggestion in code_suggestions_copy: |
| 906 | + try: |
| 907 | + relevant_file_path = suggestion['relevant_file'] |
| 908 | + for file in diff_files: |
| 909 | + if file.filename == relevant_file_path: |
| 910 | + |
| 911 | + # generate on-demand the patches range for the relevant file |
| 912 | + patch_str = file.patch |
| 913 | + if not hasattr(file, 'patches_range'): |
| 914 | + file.patches_range = [] |
| 915 | + patch_lines = patch_str.splitlines() |
| 916 | + for i, line in enumerate(patch_lines): |
| 917 | + if line.startswith('@@'): |
| 918 | + match = RE_HUNK_HEADER.match(line) |
| 919 | + # identify hunk header |
| 920 | + if match: |
| 921 | + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) |
| 922 | + file.patches_range.append({'start': start2, 'end': start2 + size2 - 1}) |
| 923 | + |
| 924 | + patches_range = file.patches_range |
| 925 | + comment_start_line = suggestion.get('relevant_lines_start', None) |
| 926 | + comment_end_line = suggestion.get('relevant_lines_end', None) |
| 927 | + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code |
| 928 | + if not comment_start_line or not comment_end_line or not original_suggestion: |
| 929 | + continue |
| 930 | + |
| 931 | + # check if the comment is inside a valid hunk |
| 932 | + is_valid_hunk = False |
| 933 | + min_distance = float('inf') |
| 934 | + patch_range_min = None |
| 935 | + # find the hunk that contains the comment, or the closest one |
| 936 | + for i, patch_range in enumerate(patches_range): |
| 937 | + d1 = comment_start_line - patch_range['start'] |
| 938 | + d2 = patch_range['end'] - comment_end_line |
| 939 | + if d1 >= 0 and d2 >= 0: # found a valid hunk |
| 940 | + is_valid_hunk = True |
| 941 | + min_distance = 0 |
| 942 | + patch_range_min = patch_range |
| 943 | + break |
| 944 | + elif d1 * d2 <= 0: # comment is possibly inside the hunk |
| 945 | + d1_clip = abs(min(0, d1)) |
| 946 | + d2_clip = abs(min(0, d2)) |
| 947 | + d = max(d1_clip, d2_clip) |
| 948 | + if d < min_distance: |
| 949 | + patch_range_min = patch_range |
| 950 | + min_distance = min(min_distance, d) |
| 951 | + if not is_valid_hunk: |
| 952 | + if min_distance < 10: # 10 lines - a reasonable distance to consider the comment inside the hunk |
| 953 | + # make the suggestion non-committable, yet multi line |
| 954 | + suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start']) |
| 955 | + suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end']) |
| 956 | + body = suggestion['body'].strip() |
| 957 | + |
| 958 | + # present new diff code in collapsible |
| 959 | + existing_code = original_suggestion['existing_code'].rstrip() + "\n" |
| 960 | + improved_code = original_suggestion['improved_code'].rstrip() + "\n" |
| 961 | + diff = difflib.unified_diff(existing_code.split('\n'), |
| 962 | + improved_code.split('\n'), n=999) |
| 963 | + patch_orig = "\n".join(diff) |
| 964 | + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') |
| 965 | + diff_code = f"\n\n<details><summary>New proposed code:</summary>\n\n```diff\n{patch.rstrip()}\n```" |
| 966 | + # replace ```suggestion ... ``` with diff_code, using regex: |
| 967 | + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) |
| 968 | + body += "\n\n</details>" |
| 969 | + suggestion['body'] = body |
| 970 | + get_logger().info(f"Comment was moved to a valid hunk, " |
| 971 | + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") |
| 972 | + else: |
| 973 | + get_logger().error(f"Comment is not inside a valid hunk, " |
| 974 | + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") |
| 975 | + except Exception as e: |
| 976 | + get_logger().error(f"Failed to process patch for committable comment, error: {e}") |
| 977 | + return code_suggestions_copy |
| 978 | + |
0 commit comments