Codium-ai · mrT23 · May 13, 2024 · May 13, 2024 · May 13, 2024 · May 13, 2024
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml
@@ -40,7 +40,7 @@ Specific instructions for generating code suggestions:
 - Suggestions should not repeat code already present in the '__new hunk__' sections.
 - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
 - When quoting variables or names from the code, use backticks (`) instead of single quote (').
-- Be aware that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, don't suggest changes that require knowledge of the entire codebase.
+- Take into account that you are reviewing a PR code diff, and that the entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase.
 
 
 {%- if extra_instructions %}
@@ -59,14 +59,9 @@ class CodeSuggestion(BaseModel):
  relevant_file: str = Field(description="the relevant file full path")
  language: str = Field(description="the code language of the relevant file")
  suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
-{%- if not commitable_code_suggestions_mode %}
  existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
  improved_code: str = Field(description="a short code snippet to illustrate the improved code, after applying the suggestion.")
  one_sentence_summary:str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
-{%- else %}
- existing_code: str = Field(description="a code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be contiguous, correctly formatted and indented, and without line numbers. Use abbreviations if needed")
- improved_code: str = Field(description="If relevant, a new code snippet, that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers". Retrun '...' if not applicable")
-{%- endif %}
  relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
  relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
  label: str = Field(description="a single label for the suggestion, to help the user understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
@@ -85,7 +80,6 @@ code_suggestions:
  python
  suggestion_content: |
  ...
-{%- if not commitable_code_suggestions_mode %}
  existing_code: |
  ...
  improved_code: |
@@ -94,14 +88,6 @@ code_suggestions:
  ...
  relevant_lines_start: 12
  relevant_lines_end: 13
-{%- else %}
- existing_code: |
- ...
- relevant_lines_start: 12
- relevant_lines_end: 13
- improved_code: |
- ...
-{%- endif %}
  label: |
  ...
 ```

diff --git a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml
@@ -46,9 +46,7 @@ __old hunk__
 The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
 =====
 class CodeSuggestionFeedback(BaseModel):
-{%- if not commitable_code_suggestions_mode %}
  suggestion_summary: str = Field(description="repeated from the input")
-{%- endif %}
  relevant_file: str = Field(description="repeated from the input")
  suggestion_score: int = Field(description="The actual output - the score of the suggestion, from 0 to 10. Give 0 if the suggestion is plain wrong. Otherwise, give a score from 1 to 10 (inclusive), where 1 is the lowest and 10 is the highest.")
  why: str = Field(description="Short and concise explanation of why the suggestion received the score (one to two sentences).")
@@ -61,13 +59,9 @@ class PRCodeSuggestionsFeedback(BaseModel):
 Example output:
 ```yaml
 code_suggestions:
-{%- if not commitable_code_suggestions_mode %}
 - suggestion_content: |
  Use a more descriptive variable name here
  relevant_file: "src/file1.py"
-{%- else %}
-- relevant_file: "src/file1.py"
-{%- endif %}
  suggestion_score: 6
  why: |
  The variable name 't' is not descriptive enough

diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
@@ -57,7 +57,6 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None,
  "language": self.main_language,
  "diff": "", # empty diff for initial calculation
  "num_code_suggestions": num_code_suggestions,
- "commitable_code_suggestions_mode": get_settings().pr_code_suggestions.commitable_code_suggestions,
  "extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
  "commit_messages_str": self.git_provider.get_commit_messages(),
  }
@@ -106,7 +105,8 @@ async def run(self):
 
  if get_settings().config.publish_output:
  self.git_provider.remove_initial_comment()
- if (not get_settings().pr_code_suggestions.commitable_code_suggestions) and self.git_provider.is_supported("gfm_markdown"):
+ if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and
+ self.git_provider.is_supported("gfm_markdown")):
 
  # generate summarized suggestions
  pr_body = self.generate_summarized_suggestions(data)
@@ -226,26 +226,29 @@ def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
  one_sentence_summary_list = []
  for i, suggestion in enumerate(data['code_suggestions']):
  try:
- if not get_settings().pr_code_suggestions.commitable_code_suggestions:
- if not suggestion or 'one_sentence_summary' not in suggestion or 'label' not in suggestion or 'relevant_file' not in suggestion:
-  get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
-  continue
+ if (not suggestion or 'one_sentence_summary' not in suggestion or
+  'label' not in suggestion or 'relevant_file' not in suggestion):
+ get_logger().debug(f"Skipping suggestion {i + 1}, because it is invalid: {suggestion}")
+ continue
 
-  if suggestion['one_sentence_summary'] in one_sentence_summary_list:
-  get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
-  continue
+ if suggestion['one_sentence_summary'] in one_sentence_summary_list:
+ get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
+ continue
 
  if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion['suggestion_content'] and 'let' in suggestion['suggestion_content']:
  get_logger().debug(f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
  continue
 
  if ('existing_code' in suggestion) and ('improved_code' in suggestion):
  if suggestion['existing_code'] == suggestion['improved_code']:
- get_logger().debug(f"skipping improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}")
- suggestion['existing_code'] = ""
+ get_logger().debug(
+ f"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}")
+ if get_settings().pr_code_suggestions.commitable_code_suggestions:
+ suggestion['improved_code'] = "" # we need 'existing_code' to locate the code in the PR
+ else:
+ suggestion['existing_code'] = ""
  suggestion = self._truncate_if_needed(suggestion)
- if not get_settings().pr_code_suggestions.commitable_code_suggestions:
- one_sentence_summary_list.append(suggestion['one_sentence_summary'])
+ one_sentence_summary_list.append(suggestion['one_sentence_summary'])
  suggestion_list.append(suggestion)
  else:
  get_logger().info(
@@ -278,7 +281,10 @@ def push_inline_code_suggestions(self, data):
  if new_code_snippet:
  new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)
 
- body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
+ if d.get('score'):
+ body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```"
+ else:
+ body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
  code_suggestions.append({'body': body, 'relevant_file': relevant_file,
  'relevant_lines_start': relevant_lines_start,
  'relevant_lines_end': relevant_lines_end})
@@ -327,7 +333,8 @@ async def _prepare_prediction_extended(self, model: str) -> dict:
  self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
  max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
  if self.patches_diff_list:
- get_logger().debug(f"PR diff", artifact=self.patches_diff_list)
+ get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
+ get_logger().debug(f"PR diff:", artifact=self.patches_diff_list)
 
  # parallelize calls to AI:
  if get_settings().pr_code_suggestions.parallel_calls:
@@ -340,24 +347,24 @@ async def _prepare_prediction_extended(self, model: str) -> dict:
  prediction = await self._get_prediction(model, patches_diff)
  prediction_list.append(prediction)
 
-
  data = {"code_suggestions": []}
- for i, predictions in enumerate(prediction_list):
+ for j, predictions in enumerate(prediction_list): # each call adds an element to the list
  if "code_suggestions" in predictions:
- score_threshold = max(1,get_settings().pr_code_suggestions.suggestions_score_threshold)
- for prediction in predictions["code_suggestions"]:
+ score_threshold = max(1, get_settings().pr_code_suggestions.suggestions_score_threshold)
+ for i, prediction in enumerate(predictions["code_suggestions"]):
  try:
  if get_settings().pr_code_suggestions.self_reflect_on_suggestions:
  score = int(prediction["score"])
  if score >= score_threshold:
  data["code_suggestions"].append(prediction)
  else:
- get_logger().info(f"Removing suggestions {i}, because score is {score}, and score_threshold is {score_threshold}",
- artifact=prediction)
+ get_logger().info(
+ f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}",
+ artifact=prediction)
  else:
- get_logger().error(f"Error getting PR diff, no code suggestions found in call {i + 1}")
+ data["code_suggestions"].append(prediction)
  except Exception as e:
- get_logger().error(f"Error getting PR diff, error: {e}")
+ get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}")
  self.data = data
  else:
  get_logger().error(f"Error getting PR diff")
@@ -547,8 +554,7 @@ async def self_reflect_on_suggestions(self, suggestion_list: List, patches_diff:
  variables = {'suggestion_list': suggestion_list,
  'suggestion_str': suggestion_str,
  "diff": patches_diff,
- 'num_code_suggestions': len(suggestion_list),
- 'commitable_code_suggestions_mode': get_settings().pr_code_suggestions.commitable_code_suggestions,}
+ 'num_code_suggestions': len(suggestion_list)}
  model = get_settings().config.model
  environment = Environment(undefined=StrictUndefined)
  system_prompt_reflect = environment.from_string(get_settings().pr_code_suggestions_reflect_prompt.system).render(