Codium-ai · mrT23 · Jul 3, 2024 · Jun 29, 2024 · Jun 29, 2024 · Jun 29, 2024
diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -99,7 +99,7 @@ def deployment_id(self):
  return get_settings().get("OPENAI.DEPLOYMENT_ID", None)
 
  @retry(
- retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
+ retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError
  stop=stop_after_attempt(OPENAI_RETRIES)
  )
  async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
@@ -143,7 +143,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
  get_logger().info(f"\nUser prompt:\n{user}")
 
  response = await acompletion(**kwargs)
- except (openai.APIError, openai.Timeout) as e:
+ except (openai.APIError, openai.APITimeoutError) as e:
  get_logger().error("Error during OpenAI inference: ", e)
  raise
  except (openai.RateLimitError) as e:

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
@@ -37,15 +37,18 @@ def get_setting(key: str) -> Any:
  return global_settings.get(key, None)
 
 
-def emphasize_header(text: str) -> str:
+def emphasize_header(text: str, only_markdown=False) -> str:
  try:
  # Finding the position of the first occurrence of ": "
  colon_position = text.find(": ")
 
  # Splitting the string and wrapping the first part in <strong> tags
  if colon_position != -1:
  # Everything before the colon (inclusive) is wrapped in <strong> tags
- transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
+ if only_markdown:
+ transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
+ else:
+ transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
  else:
  # If there's no ": ", return the original string
  transformed_string = text
@@ -67,8 +70,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
  seen.add(item)
  return unique_list
 
-
-def convert_to_markdown(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
+def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
  """
  Convert a dictionary of data into markdown format.
  Args:
@@ -96,56 +98,99 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
  else:
  markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n"
  markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
+ # if not output_data or not output_data.get('review', {}):
+ # return ""
+
  if gfm_supported:
  markdown_text += "<table>\n"
- # markdown_text += """<td> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Feedback&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td> <td></td></tr>"""
-
- if not output_data or not output_data.get('review', {}):
- return ""
 
  for key, value in output_data['review'].items():
  if value is None or value == '' or value == {} or value == []:
  if key.lower() != 'can_be_split':
  continue
  key_nice = key.replace('_', ' ').capitalize()
  emoji = emojis.get(key_nice, "")
- if gfm_supported:
- if 'Estimated effort to review' in key_nice:
- key_nice = 'Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]'
- if 'security concerns' in key_nice.lower():
- value = emphasize_header(value.strip())
- markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
- elif 'can be split' in key_nice.lower():
- markdown_text += process_can_be_split(emoji, value)
- elif 'key issues to review' in key_nice.lower():
- value = value.strip()
- issues = value.split('\n- ')
- for i, _ in enumerate(issues):
- issues[i] = issues[i].strip().strip('-').strip()
- issues = unique_strings(issues) # remove duplicates
- number_of_issues = len(issues)
- if number_of_issues > 1:
- markdown_text += f"<tr><td rowspan={number_of_issues}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
- for i, issue in enumerate(issues):
- if not issue:
- continue
- issue = emphasize_header(issue)
- issue = replace_code_tags(issue)
- if i == 0:
- markdown_text += f"<td>\n{issue}</td></tr>\n"
- else:
- markdown_text += f"<tr>\n<td>\n{issue}</td></tr>\n"
+ if 'Estimated effort to review' in key_nice:
+ key_nice = 'Estimated effort to review'
+ value_int = int(value)
+ blue_bars = '🔵' * value_int
+ white_bars = '⚪' * (5 - value_int)
+ value = f"{value.strip()} {blue_bars}{white_bars}"
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
+ markdown_text += f"</td></tr>\n"
+ else:
+ markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
+ elif 'relevant tests' in key_nice.lower():
+ value = value.strip().lower()
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ if is_value_no(value):
+ markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
  else:
- value = emphasize_header(value.strip('-').strip())
- value = replace_code_tags(value)
- markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
+ markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
+ markdown_text += f"</td></tr>\n"
  else:
- markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ if is_value_no(value):
+ markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
+ else:
+ markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
+ else:
+ if is_value_no(value):
+ markdown_text += f'### {emoji} No relevant tests\n\n'
+ else:
+ markdown_text += f"### PR contains tests\n\n"
+ elif 'security concerns' in key_nice.lower():
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ if is_value_no(value):
+ markdown_text += f"{emoji}&nbsp;<strong>No security concerns identified</strong>"
+ else:
+ markdown_text += f"{emoji}&nbsp;<strong>Security concerns</strong><br><br>\n\n"
+ value = emphasize_header(value.strip())
+ markdown_text += f"{value}"
+ markdown_text += f"</td></tr>\n"
+ else:
+ if is_value_no(value):
+ markdown_text += f'### {emoji} No security concerns identified\n\n'
+ else:
+ markdown_text += f"### {emoji} Security concerns\n\n"
+ value = emphasize_header(value.strip())
+ markdown_text += f"{value}\n\n"
+ elif 'can be split' in key_nice.lower():
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ markdown_text += process_can_be_split(emoji, value)
+ markdown_text += f"</td></tr>\n"
+ elif 'key issues to review' in key_nice.lower():
+ value = value.strip()
+ issues = value.split('\n- ')
+ for i, _ in enumerate(issues):
+ issues[i] = issues[i].strip().strip('-').strip()
+ issues = unique_strings(issues) # remove duplicates
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n"
+ else:
+ markdown_text += f"### {emoji} Key issues to review:\n\n"
+ for i, issue in enumerate(issues):
+ if not issue:
+ continue
+ issue = emphasize_header(issue, only_markdown=True)
+ markdown_text += f"{issue}\n\n"
+ if gfm_supported:
+ markdown_text += f"</td></tr>\n"
  else:
- if len(value.split()) > 1:
- markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
+ if gfm_supported:
+ markdown_text += f"<tr><td>"
+ markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
+ markdown_text += f"</td></tr>\n"
  else:
- markdown_text += f"{emoji} **{key_nice}:** {value}\n\n"
+ markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
+
  if gfm_supported:
  markdown_text += "</table>\n"
 
@@ -155,17 +200,15 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
  markdown_text += f"<details><summary> <strong>Code feedback:</strong></summary>\n\n"
  markdown_text += "<hr>"
  else:
- markdown_text += f"\n\n** Code feedback:**\n\n"
+ markdown_text += f"\n\n### Code feedback:\n\n"
  for i, value in enumerate(output_data['code_feedback']):
  if value is None or value == '' or value == {} or value == []:
  continue
  markdown_text += parse_code_suggestion(value, i, gfm_supported)+"\n\n"
  if markdown_text.endswith('<hr>'):
- markdown_text = markdown_text[:-4]
+ markdown_text= markdown_text[:-4]
  if gfm_supported:
  markdown_text += f"</details>"
- #print(markdown_text)
-
 
  return markdown_text
 
@@ -177,29 +220,47 @@ def process_can_be_split(emoji, value):
  markdown_text = ""
  if not value or isinstance(value, list) and len(value) == 1:
  value = "No"
- markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+ # markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+ # markdown_text += f"### {emoji} No multiple PR themes\n\n"
+ markdown_text += f"{emoji} <strong>No multiple PR themes</strong>\n\n"
  else:
- number_of_splits = len(value)
- markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+ markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
  for i, split in enumerate(value):
  title = split.get('title', '')
  relevant_files = split.get('relevant_files', [])
- if i == 0:
- markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
- markdown_text += f"<hr>\n"
- markdown_text += f"Relevant files:\n"
- markdown_text += f"<ul>\n"
- for file in relevant_files:
- markdown_text += f"<li>{file}</li>\n"
- markdown_text += f"</ul>\n\n</details></td></tr>\n"
- else:
- markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
- markdown_text += f"<hr>\n"
- markdown_text += f"Relevant files:\n"
- markdown_text += f"<ul>\n"
- for file in relevant_files:
- markdown_text += f"<li>{file}</li>\n"
- markdown_text += f"</ul>\n\n</details></td></tr>\n"
+ markdown_text += f"<details><summary>\nSub-PR theme: <b>{title}</b></summary>\n\n"
+ markdown_text += f"___\n\nRelevant files:\n\n"
+ for file in relevant_files:
+ markdown_text += f"- {file}\n"
+ markdown_text += f"___\n\n"
+ markdown_text += f"</details>\n\n"
+
+ # markdown_text += f"#### Sub-PR theme: {title}\n\n"
+ # markdown_text += f"Relevant files:\n\n"
+ # for file in relevant_files:
+ # markdown_text += f"- {file}\n"
+ # markdown_text += "\n"
+ # number_of_splits = len(value)
+ # markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+ # for i, split in enumerate(value):
+ # title = split.get('title', '')
+ # relevant_files = split.get('relevant_files', [])
+ # if i == 0:
+ # markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
+ # markdown_text += f"<hr>\n"
+ # markdown_text += f"Relevant files:\n"
+ # markdown_text += f"<ul>\n"
+ # for file in relevant_files:
+ # markdown_text += f"<li>{file}</li>\n"
+ # markdown_text += f"</ul>\n\n</details></td></tr>\n"
+ # else:
+ # markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
+ # markdown_text += f"<hr>\n"
+ # markdown_text += f"Relevant files:\n"
+ # markdown_text += f"<ul>\n"
+ # for file in relevant_files:
+ # markdown_text += f"<li>{file}</li>\n"
+ # markdown_text += f"</ul>\n\n</details></td></tr>\n"
  except Exception as e:
  get_logger().exception(f"Failed to process can be split: {e}")
  return ""
@@ -778,3 +839,11 @@ def show_relevant_configurations(relevant_section: str) -> str:
  markdown_text += "\n```"
  markdown_text += "\n</details>\n"
  return markdown_text
+
+def is_value_no(value):
+ if value is None:
+ return True
+ value_str = str(value).strip().lower()
+ if value_str == 'no' or value_str == 'none' or value_str == 'false':
+ return True
+ return False
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml
@@ -27,14 +27,15 @@ __old hunk__
 ## file: 'src/file2.py'
 ...
 ======
-- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
-- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
+- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
 - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
+Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
 
 
 Specific instructions for generating code suggestions:
 - Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
-- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.
+- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
 - Suggestions should not repeat code already present in the '__new hunk__' sections.

diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
@@ -8,8 +8,8 @@
 from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown, github_action_output, load_yaml, ModelType, \
- show_relevant_configurations
+from pr_agent.algo.utils import github_action_output, load_yaml, ModelType, \
+ show_relevant_configurations, convert_to_markdown_v2, PRReviewHeader
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
 from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
@@ -230,7 +230,7 @@ def _prepare_pr_review(self) -> str:
  f"{self.git_provider.incremental.first_new_commit_sha}"
  incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
 
- markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"),
+ markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
  incremental_review_markdown_text)
 
  # Add help text if gfm_markdown is supported