Skip to content

Commit

Permalink
Merge pull request #1020 from Codium-ai/tr/fallbacks
Browse files Browse the repository at this point in the history
Tr/fallbacks
  • Loading branch information
mrT23 authored Jul 4, 2024
2 parents f7a6e93 + e30c70d commit 1f5c3a4
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 12 deletions.
41 changes: 32 additions & 9 deletions pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,25 +511,28 @@ def _fix_key_value(key: str, value: str):
return key, value


def load_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict:
response_text = response_text.removeprefix('```yaml').rstrip('`')
try:
data = yaml.safe_load(response_text)
except Exception as e:
get_logger().error(f"Failed to parse AI prediction: {e}")
data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml)
data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key)
return data


def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
def try_fix_yaml(response_text: str,
keys_fix_yaml: List[str] = [],
first_key="",
last_key="",) -> dict:
response_text_lines = response_text.split('\n')

keys = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
keys = keys + keys_fix_yaml
keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
keys_yaml = keys_yaml + keys_fix_yaml
# first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...'
response_text_lines_copy = response_text_lines.copy()
for i in range(0, len(response_text_lines_copy)):
for key in keys:
for key in keys_yaml:
if key in response_text_lines_copy[i] and not '|-' in response_text_lines_copy[i]:
response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',
f'{key} |-\n ')
Expand All @@ -540,14 +543,14 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
except:
get_logger().info(f"Failed to parse AI prediction after adding |-\n")

# second fallback - try to extract only range from first ```yaml to ````
# second fallback - try to extract only range from first ```yaml to ```
snippet_pattern = r'```(yaml)?[\s\S]*?```'
snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
if snippet:
snippet_text = snippet.group()
try:
data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`'))
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet with second fallback")
return data
except:
pass
Expand All @@ -562,7 +565,27 @@ def try_fix_yaml(response_text: str, keys_fix_yaml: List[str] = []) -> dict:
except:
pass

# fourth fallback - try to remove last lines
# forth fallback - try to extract yaml snippet by 'first_key' and 'last_key'
# note that 'last_key' can be in practice a key that is not the last key in the yaml snippet.
# it just needs to be some inner key, so we can look for newlines after it
if first_key and last_key:
index_start = response_text.find(f"\n{first_key}:")
if index_start == -1:
index_start = response_text.find(f"{first_key}:")
index_last_code = response_text.rfind(f"{last_key}:")
index_end = response_text.find("\n\n", index_last_code) # look for newlines after last_key
if index_end == -1:
index_end = len(response_text)
response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip()
try:
data = yaml.safe_load(response_text_copy)
get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
return data
except:
pass


# fifth fallback - try to remove last lines
data = {}
for i in range(1, len(response_text_lines)):
response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
Expand Down
3 changes: 2 additions & 1 deletion pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ def _truncate_if_needed(suggestion):

def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
data = load_yaml(predictions.strip(),
keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"])
keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"],
first_key="code_suggestions",last_key="label")
if isinstance(data, list):
data = {'code_suggestions': data}

Expand Down
10 changes: 8 additions & 2 deletions pr_agent/tools/pr_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,12 @@ def _prepare_pr_review(self) -> str:
Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes
the feedback.
"""
first_key = 'review'
last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(),
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"])
"relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key)
github_action_output(data, 'review')

# move data['review'] 'key_issues_to_review' key to the end of the dictionary
Expand Down Expand Up @@ -258,9 +261,12 @@ def _publish_inline_code_comments(self) -> None:
if get_settings().pr_reviewer.num_code_suggestions == 0:
return

first_key = 'review'
last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(),
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"])
"relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key)
comments: List[str] = []
for suggestion in data.get('code_feedback', []):
relevant_file = suggestion.get('relevant_file', '').strip()
Expand Down
46 changes: 46 additions & 0 deletions tests/unittest/test_try_fix_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,49 @@ def test_remove_last_line(self):
def test_empty_yaml_fixed(self):
review_text = ""
assert try_fix_yaml(review_text) is None


# The function extracts YAML snippet
def test_no_initial_yaml(self):
review_text = '''\
I suggest the following:
code_suggestions:
- relevant_file: |
src/index.ts
label: |
best practice
- relevant_file: |
src/index2.ts
label: |
enhancment
```
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}

assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output

def test_with_initial_yaml(self):
review_text = '''\
I suggest the following:
```
code_suggestions:
- relevant_file: |
src/index.ts
label: |
best practice
- relevant_file: |
src/index2.ts
label: |
enhancment
```
We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancment'}]}
assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output

0 comments on commit 1f5c3a4

Please sign in to comment.