Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements in Patch Formatting and Code Suggestions Handling #630

Merged
merged 3 commits into from
Jan 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions pr_agent/algo/git_patch_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
...
"""

patch_with_lines_str = f"\n\n## {file.filename}\n"
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
Expand All @@ -202,11 +202,11 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
if new_content_lines:
if prev_header_line:
patch_with_lines_str += f'\n{prev_header_line}\n'
patch_with_lines_str += '__new hunk__\n'
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str += '__old hunk__\n'
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
new_content_lines = []
Expand Down Expand Up @@ -236,11 +236,11 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
if match and new_content_lines:
if new_content_lines:
patch_with_lines_str += f'\n{header_line}\n'
patch_with_lines_str += '\n__new hunk__\n'
patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str += '\n__old hunk__\n'
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"

Expand Down
11 changes: 9 additions & 2 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo

if patch:
if not convert_hunks_to_line_numbers:
patch_final = f"## {file.filename}\n\n{patch}\n"
patch_final = f"\n\n## file: '{file.filename.strip()}\n\n{patch.strip()}\n'"
else:
patch_final = patch
patch_final = "\n\n" + patch.strip()
patches.append(patch_final)
total_tokens += token_handler.count_tokens(patch_final)
if get_settings().config.verbosity_level >= 2:
Expand Down Expand Up @@ -375,6 +375,13 @@ def get_pr_multi_diffs(git_provider: GitProvider,
for lang in pr_languages:
sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))


# try first a single run with standard diff string, with patch extension, and no deletions
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
pr_languages, token_handler, add_line_numbers_to_hunks=True)
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
return ["\n".join(patches_extended)]

patches = []
final_diff_list = []
total_tokens = token_handler.prompt_tokens
Expand Down
5 changes: 2 additions & 3 deletions pr_agent/settings/pr_add_docs.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Your task is to generate {{ docs_for_language }} for code components in the PR D

Example for the PR Diff format:
======
## src/file1.py
## file: 'src/file1.py'

@@ -12,3 +12,4 @@ def func1():
__new hunk__
Expand All @@ -18,15 +18,14 @@ __old hunk__
-code line that was removed in the PR
code line2 that remained unchanged in the PR


@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...


## src/file2.py
## file: 'src/file2.py'
...
======

Expand Down
13 changes: 5 additions & 8 deletions pr_agent/settings/pr_code_suggestions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Your task is to provide meaningful and actionable code suggestions, to improve t

Example for the PR Diff format:
======
## src/file1.py
## file: 'src/file1.py'

@@ ... @@ def func1():
__new hunk__
Expand All @@ -16,15 +16,14 @@ __old hunk__
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR


@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...


## src/file2.py
## file: 'src/file2.py'
...
======

Expand All @@ -51,6 +50,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="the relevant file full path")
language: str = Field(description="the code language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
{%- if summarize_mode %}
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
Expand All @@ -74,6 +74,8 @@ Example output:
code_suggestions:
- relevant_file: |-
src/file1.py
language: |-
python
suggestion_content: |-
Add a docstring to func1()
{%- if summarize_mode %}
Expand Down Expand Up @@ -105,11 +107,6 @@ user="""PR Info:

Title: '{{title}}'

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}


The PR Diff:
======
Expand Down
6 changes: 3 additions & 3 deletions pr_agent/settings/pr_description_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class PRType(str, Enum):

Class FileDescription(BaseModel):
filename: str = Field(description="the relevant file full path")
language: str = Field(description="the relevant file language")
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).")
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
Expand Down Expand Up @@ -67,6 +68,8 @@ type:
pr_files:
- filename: |
...
language: |
...
changes_summary: |
...
changes_title: |
Expand Down Expand Up @@ -104,10 +107,7 @@ Previous description:
{%- endif %}

Branch: '{{branch}}'
{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}
{%- if commit_messages_str %}

Commit messages:
Expand Down
14 changes: 7 additions & 7 deletions pr_agent/settings/pr_reviewer_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ The review should focus on new code added in the PR diff (lines starting with '+

Example PR Diff:
======
## src/file1.py
## file: 'src/file1.py'

@@ -12,5 +12,5 @@ def func1():
code line 1 that remained unchanged in the PR
Expand All @@ -14,12 +14,11 @@ code line 2 that remained unchanged in the PR
+code line added in the PR
code line 3 that remained unchanged in the PR


@@ ... @@ def func2():
...


## src/file2.py
## file: 'src/file2.py'
...
======

Expand Down Expand Up @@ -115,6 +114,9 @@ PR Feedback:
relevant file:
type: string
description: the relevant file full path
language:
type: string
description: the language of the relevant file
suggestion:
type: string
description: |-
Expand Down Expand Up @@ -166,6 +168,8 @@ PR Feedback:
Code feedback:
- relevant file: |-
directory/xxx.py
language: |-
python
suggestion: |-
xxx [important]
relevant line: |-
Expand Down Expand Up @@ -195,10 +199,6 @@ Description:
======
{%- endif %}

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}
{%- if commit_messages_str %}

Commit messages:
Expand Down
7 changes: 6 additions & 1 deletion pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ async def _prepare_prediction_extended(self, model: str) -> dict:
for i, patches_diff in enumerate(patches_diff_list):
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
self.patches_diff = patches_diff
prediction = await self._get_prediction(model)
prediction = await self._get_prediction(model) # toDo: parallelize
prediction_list.append(prediction)
self.prediction_list = prediction_list

Expand All @@ -253,10 +253,15 @@ async def rank_suggestions(self, data: List) -> List:
"""

suggestion_list = []
if not data:
return suggestion_list
for suggestion in data:
suggestion_list.append(suggestion)
data_sorted = [[]] * len(suggestion_list)

if len(suggestion_list ) == 1:
return suggestion_list

try:
suggestion_str = ""
for i, suggestion in enumerate(suggestion_list):
Expand Down
Loading