From 6565556e0162cb737d740338da15179ad6d34f6c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 29 Jan 2024 20:51:24 +0200 Subject: [PATCH 1/3] feat: Add 'language' field to CodeSuggestion, FileDescription, and ReviewerPrompt models in settings files --- pr_agent/settings/pr_code_suggestions_prompts.toml | 8 +++----- pr_agent/settings/pr_description_prompts.toml | 6 +++--- pr_agent/settings/pr_reviewer_prompts.toml | 9 +++++---- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 2fb224c75..ee3110544 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -51,6 +51,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin ===== class CodeSuggestion(BaseModel): relevant_file: str = Field(description="the relevant file full path") + language: str = Field(description="the code language of the relevant file") suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR") {%- if summarize_mode %} existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.") @@ -74,6 +75,8 @@ Example output: code_suggestions: - relevant_file: |- src/file1.py + language: |- + python suggestion_content: |- Add a docstring to func1() {%- if summarize_mode %} @@ -105,11 +108,6 @@ user="""PR Info: Title: '{{title}}' -{%- if language %} - -Main PR language: '{{ language }}' -{%- endif %} - The PR Diff: ====== diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index b9c5ce394..b36b01834 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -39,6 +39,7 @@ class PRType(str, Enum): Class FileDescription(BaseModel): filename: str = Field(description="the relevant file full path") + language: str = Field(description="the relevant file language") changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).") changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).") label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") @@ -67,6 +68,8 @@ type: pr_files: - filename: | ... + language: | + ... changes_summary: | ... changes_title: | @@ -104,10 +107,7 @@ Previous description: {%- endif %} Branch: '{{branch}}' -{%- if language %} -Main PR language: '{{ language }}' -{%- endif %} {%- if commit_messages_str %} Commit messages: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 736fb2472..5312832c6 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -115,6 +115,9 @@ PR Feedback: relevant file: type: string description: the relevant file full path + language: + type: string + description: the language of the relevant file suggestion: type: string description: |- @@ -166,6 +169,8 @@ PR Feedback: Code feedback: - relevant file: |- directory/xxx.py + language: |- + python suggestion: |- xxx [important] relevant line: |- @@ -195,10 +200,6 @@ Description: ====== {%- endif %} -{%- if language %} - -Main PR language: '{{ language }}' -{%- endif %} {%- if commit_messages_str %} Commit messages: From 0d867797990324964251c754d4aae38115d0fddc Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 29 Jan 2024 21:52:54 +0200 Subject: [PATCH 2/3] feat: Improve patch formatting and handle empty data in pr_code_suggestions.py --- pr_agent/algo/git_patch_processing.py | 10 +++++----- pr_agent/algo/pr_processing.py | 4 ++-- pr_agent/settings/pr_add_docs.toml | 5 ++--- pr_agent/settings/pr_code_suggestions_prompts.toml | 5 ++--- pr_agent/settings/pr_reviewer_prompts.toml | 5 ++--- pr_agent/tools/pr_code_suggestions.py | 2 ++ 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 480387fa9..10d140b06 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -181,7 +181,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: ... """ - patch_with_lines_str = f"\n\n## {file.filename}\n" + patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n" patch_lines = patch.splitlines() RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") @@ -202,11 +202,11 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: if new_content_lines: if prev_header_line: patch_with_lines_str += f'\n{prev_header_line}\n' - patch_with_lines_str += '__new hunk__\n' + patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n' for i, line_new in enumerate(new_content_lines): patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str += '__old hunk__\n' + patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n' for line_old in old_content_lines: patch_with_lines_str += f"{line_old}\n" new_content_lines = [] @@ -236,11 +236,11 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: if match and new_content_lines: if new_content_lines: patch_with_lines_str += f'\n{header_line}\n' - patch_with_lines_str += '\n__new hunk__\n' + patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n' for i, line_new in enumerate(new_content_lines): patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str += '\n__old hunk__\n' + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' for line_old in old_content_lines: patch_with_lines_str += f"{line_old}\n" diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index ecec30151..f4ffae89e 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -209,9 +209,9 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo if patch: if not convert_hunks_to_line_numbers: - patch_final = f"## {file.filename}\n\n{patch}\n" + patch_final = f"\n\n## file: '{file.filename.strip()}\n\n{patch.strip()}\n'" else: - patch_final = patch + patch_final = "\n\n" + patch.strip() patches.append(patch_final) total_tokens += token_handler.count_tokens(patch_final) if get_settings().config.verbosity_level >= 2: diff --git a/pr_agent/settings/pr_add_docs.toml b/pr_agent/settings/pr_add_docs.toml index 44e9f0910..cc33eee5b 100644 --- a/pr_agent/settings/pr_add_docs.toml +++ b/pr_agent/settings/pr_add_docs.toml @@ -5,7 +5,7 @@ Your task is to generate {{ docs_for_language }} for code components in the PR D Example for the PR Diff format: ====== -## src/file1.py +## file: 'src/file1.py' @@ -12,3 +12,4 @@ def func1(): __new hunk__ @@ -18,7 +18,6 @@ __old hunk__ -code line that was removed in the PR code line2 that remained unchanged in the PR - @@ ... @@ def func2(): __new hunk__ ... @@ -26,7 +25,7 @@ __old hunk__ ... -## src/file2.py +## file: 'src/file2.py' ... ====== diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index ee3110544..71d923505 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -4,7 +4,7 @@ Your task is to provide meaningful and actionable code suggestions, to improve t Example for the PR Diff format: ====== -## src/file1.py +## file: 'src/file1.py' @@ ... @@ def func1(): __new hunk__ @@ -16,7 +16,6 @@ __old hunk__ -old code line2 that was removed in the PR code line3 that remained unchanged in the PR - @@ ... @@ def func2(): __new hunk__ ... @@ -24,7 +23,7 @@ __old hunk__ ... -## src/file2.py +## file: 'src/file2.py' ... ====== diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 5312832c6..427cd9743 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -5,7 +5,7 @@ The review should focus on new code added in the PR diff (lines starting with '+ Example PR Diff: ====== -## src/file1.py +## file: 'src/file1.py' @@ -12,5 +12,5 @@ def func1(): code line 1 that remained unchanged in the PR @@ -14,12 +14,11 @@ code line 2 that remained unchanged in the PR +code line added in the PR code line 3 that remained unchanged in the PR - @@ ... @@ def func2(): ... -## src/file2.py +## file: 'src/file2.py' ... ====== diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 1456f9a61..08074899e 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -253,6 +253,8 @@ async def rank_suggestions(self, data: List) -> List: """ suggestion_list = [] + if not data: + return suggestion_list for suggestion in data: suggestion_list.append(suggestion) data_sorted = [[]] * len(suggestion_list) From 15c8fe94bb29963cf12e77c046834c0c547da810 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Mon, 29 Jan 2024 22:00:11 +0200 Subject: [PATCH 3/3] feat: Improve patch formatting and handle empty data in pr_code_suggestions.py --- pr_agent/algo/pr_processing.py | 7 +++++++ pr_agent/tools/pr_code_suggestions.py | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index f4ffae89e..1e482dbfb 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -375,6 +375,13 @@ def get_pr_multi_diffs(git_provider: GitProvider, for lang in pr_languages: sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True)) + + # try first a single run with standard diff string, with patch extension, and no deletions + patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff( + pr_languages, token_handler, add_line_numbers_to_hunks=True) + if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model): + return ["\n".join(patches_extended)] + patches = [] final_diff_list = [] total_tokens = token_handler.prompt_tokens diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 08074899e..381c02a65 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -226,7 +226,7 @@ async def _prepare_prediction_extended(self, model: str) -> dict: for i, patches_diff in enumerate(patches_diff_list): get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}") self.patches_diff = patches_diff - prediction = await self._get_prediction(model) + prediction = await self._get_prediction(model) # toDo: parallelize prediction_list.append(prediction) self.prediction_list = prediction_list @@ -259,6 +259,9 @@ async def rank_suggestions(self, data: List) -> List: suggestion_list.append(suggestion) data_sorted = [[]] * len(suggestion_list) + if len(suggestion_list ) == 1: + return suggestion_list + try: suggestion_str = "" for i, suggestion in enumerate(suggestion_list):