Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tr/review redesign #1011

Merged
merged 10 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def deployment_id(self):
return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

@retry(
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError
stop=stop_after_attempt(OPENAI_RETRIES)
)
async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
Expand Down Expand Up @@ -143,7 +143,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
get_logger().info(f"\nUser prompt:\n{user}")

response = await acompletion(**kwargs)
except (openai.APIError, openai.Timeout) as e:
except (openai.APIError, openai.APITimeoutError) as e:
get_logger().error("Error during OpenAI inference: ", e)
raise
except (openai.RateLimitError) as e:
Expand Down
197 changes: 133 additions & 64 deletions pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,18 @@ def get_setting(key: str) -> Any:
return global_settings.get(key, None)


def emphasize_header(text: str) -> str:
def emphasize_header(text: str, only_markdown=False) -> str:
try:
# Finding the position of the first occurrence of ": "
colon_position = text.find(": ")

# Splitting the string and wrapping the first part in <strong> tags
if colon_position != -1:
# Everything before the colon (inclusive) is wrapped in <strong> tags
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
if only_markdown:
transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
else:
transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
mrT23 marked this conversation as resolved.
Show resolved Hide resolved
else:
# If there's no ": ", return the original string
transformed_string = text
Expand All @@ -67,8 +70,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
seen.add(item)
return unique_list


def convert_to_markdown(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
Expand Down Expand Up @@ -96,56 +98,99 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
else:
markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n"
markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
# if not output_data or not output_data.get('review', {}):
# return ""

if gfm_supported:
markdown_text += "<table>\n"
# markdown_text += """<td> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Feedback&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td> <td></td></tr>"""

if not output_data or not output_data.get('review', {}):
return ""

for key, value in output_data['review'].items():
if value is None or value == '' or value == {} or value == []:
if key.lower() != 'can_be_split':
continue
key_nice = key.replace('_', ' ').capitalize()
emoji = emojis.get(key_nice, "")
if gfm_supported:
if 'Estimated effort to review' in key_nice:
key_nice = 'Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]'
if 'security concerns' in key_nice.lower():
value = emphasize_header(value.strip())
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
elif 'can be split' in key_nice.lower():
markdown_text += process_can_be_split(emoji, value)
elif 'key issues to review' in key_nice.lower():
value = value.strip()
issues = value.split('\n- ')
for i, _ in enumerate(issues):
issues[i] = issues[i].strip().strip('-').strip()
issues = unique_strings(issues) # remove duplicates
number_of_issues = len(issues)
if number_of_issues > 1:
markdown_text += f"<tr><td rowspan={number_of_issues}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
for i, issue in enumerate(issues):
if not issue:
continue
issue = emphasize_header(issue)
issue = replace_code_tags(issue)
if i == 0:
markdown_text += f"<td>\n{issue}</td></tr>\n"
else:
markdown_text += f"<tr>\n<td>\n{issue}</td></tr>\n"
if 'Estimated effort to review' in key_nice:
key_nice = 'Estimated effort to review'
value_int = int(value)
blue_bars = '🔵' * value_int
white_bars = '⚪' * (5 - value_int)
value = f"{value.strip()} {blue_bars}{white_bars}"
if gfm_supported:
markdown_text += f"<tr><td>"
markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
markdown_text += f"</td></tr>\n"
else:
markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
elif 'relevant tests' in key_nice.lower():
value = value.strip().lower()
if gfm_supported:
markdown_text += f"<tr><td>"
if is_value_no(value):
markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
else:
value = emphasize_header(value.strip('-').strip())
value = replace_code_tags(value)
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
markdown_text += f"</td></tr>\n"
else:
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
if gfm_supported:
markdown_text += f"<tr><td>"
if is_value_no(value):
markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
else:
markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
else:
if is_value_no(value):
markdown_text += f'### {emoji} No relevant tests\n\n'
else:
markdown_text += f"### PR contains tests\n\n"
elif 'security concerns' in key_nice.lower():
if gfm_supported:
markdown_text += f"<tr><td>"
if is_value_no(value):
markdown_text += f"{emoji}&nbsp;<strong>No security concerns identified</strong>"
else:
markdown_text += f"{emoji}&nbsp;<strong>Security concerns</strong><br><br>\n\n"
value = emphasize_header(value.strip())
markdown_text += f"{value}"
markdown_text += f"</td></tr>\n"
else:
if is_value_no(value):
markdown_text += f'### {emoji} No security concerns identified\n\n'
else:
markdown_text += f"### {emoji} Security concerns\n\n"
value = emphasize_header(value.strip())
markdown_text += f"{value}\n\n"
elif 'can be split' in key_nice.lower():
if gfm_supported:
markdown_text += f"<tr><td>"
markdown_text += process_can_be_split(emoji, value)
markdown_text += f"</td></tr>\n"
elif 'key issues to review' in key_nice.lower():
value = value.strip()
issues = value.split('\n- ')
for i, _ in enumerate(issues):
issues[i] = issues[i].strip().strip('-').strip()
issues = unique_strings(issues) # remove duplicates
if gfm_supported:
markdown_text += f"<tr><td>"
markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n"
else:
markdown_text += f"### {emoji} Key issues to review:\n\n"
for i, issue in enumerate(issues):
if not issue:
continue
issue = emphasize_header(issue, only_markdown=True)
markdown_text += f"{issue}\n\n"
if gfm_supported:
markdown_text += f"</td></tr>\n"
else:
if len(value.split()) > 1:
markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
if gfm_supported:
markdown_text += f"<tr><td>"
markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
markdown_text += f"</td></tr>\n"
else:
markdown_text += f"{emoji} **{key_nice}:** {value}\n\n"
markdown_text += f"### {emoji} {key_nice}: {value}\n\n"

if gfm_supported:
markdown_text += "</table>\n"

Expand All @@ -155,17 +200,15 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
markdown_text += f"<details><summary> <strong>Code feedback:</strong></summary>\n\n"
markdown_text += "<hr>"
else:
markdown_text += f"\n\n** Code feedback:**\n\n"
markdown_text += f"\n\n### Code feedback:\n\n"
for i, value in enumerate(output_data['code_feedback']):
if value is None or value == '' or value == {} or value == []:
continue
markdown_text += parse_code_suggestion(value, i, gfm_supported)+"\n\n"
if markdown_text.endswith('<hr>'):
markdown_text = markdown_text[:-4]
markdown_text= markdown_text[:-4]
if gfm_supported:
markdown_text += f"</details>"
#print(markdown_text)


return markdown_text

Expand All @@ -177,29 +220,47 @@ def process_can_be_split(emoji, value):
markdown_text = ""
if not value or isinstance(value, list) and len(value) == 1:
value = "No"
markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
# markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
# markdown_text += f"### {emoji} No multiple PR themes\n\n"
markdown_text += f"{emoji} <strong>No multiple PR themes</strong>\n\n"
else:
number_of_splits = len(value)
markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
for i, split in enumerate(value):
title = split.get('title', '')
relevant_files = split.get('relevant_files', [])
if i == 0:
markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
else:
markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
markdown_text += f"<hr>\n"
markdown_text += f"Relevant files:\n"
markdown_text += f"<ul>\n"
for file in relevant_files:
markdown_text += f"<li>{file}</li>\n"
markdown_text += f"</ul>\n\n</details></td></tr>\n"
markdown_text += f"<details><summary>\nSub-PR theme: <b>{title}</b></summary>\n\n"
markdown_text += f"___\n\nRelevant files:\n\n"
for file in relevant_files:
markdown_text += f"- {file}\n"
markdown_text += f"___\n\n"
markdown_text += f"</details>\n\n"

# markdown_text += f"#### Sub-PR theme: {title}\n\n"
# markdown_text += f"Relevant files:\n\n"
# for file in relevant_files:
# markdown_text += f"- {file}\n"
# markdown_text += "\n"
# number_of_splits = len(value)
# markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
# for i, split in enumerate(value):
# title = split.get('title', '')
# relevant_files = split.get('relevant_files', [])
# if i == 0:
# markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
# markdown_text += f"<hr>\n"
# markdown_text += f"Relevant files:\n"
# markdown_text += f"<ul>\n"
# for file in relevant_files:
# markdown_text += f"<li>{file}</li>\n"
# markdown_text += f"</ul>\n\n</details></td></tr>\n"
# else:
# markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
# markdown_text += f"<hr>\n"
# markdown_text += f"Relevant files:\n"
# markdown_text += f"<ul>\n"
# for file in relevant_files:
# markdown_text += f"<li>{file}</li>\n"
# markdown_text += f"</ul>\n\n</details></td></tr>\n"
except Exception as e:
get_logger().exception(f"Failed to process can be split: {e}")
return ""
Expand Down Expand Up @@ -778,3 +839,11 @@ def show_relevant_configurations(relevant_section: str) -> str:
markdown_text += "\n```"
markdown_text += "\n</details>\n"
return markdown_text

def is_value_no(value):
if value is None:
return True
value_str = str(value).strip().lower()
if value_str == 'no' or value_str == 'none' or value_str == 'false':
return True
return False
7 changes: 4 additions & 3 deletions pr_agent/settings/pr_code_suggestions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,15 @@ __old hunk__
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.


Specific instructions for generating code suggestions:
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Suggestions should not repeat code already present in the '__new hunk__' sections.
Expand Down
6 changes: 3 additions & 3 deletions pr_agent/tools/pr_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown, github_action_output, load_yaml, ModelType, \
show_relevant_configurations
from pr_agent.algo.utils import github_action_output, load_yaml, ModelType, \
show_relevant_configurations, convert_to_markdown_v2, PRReviewHeader
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
Expand Down Expand Up @@ -230,7 +230,7 @@ def _prepare_pr_review(self) -> str:
f"{self.git_provider.incremental.first_new_commit_sha}"
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"

markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"),
markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
incremental_review_markdown_text)

# Add help text if gfm_markdown is supported
Expand Down
Loading