Skip to content

Commit

Permalink
Merge pull request #1022 from Codium-ai/tr/claude
Browse files Browse the repository at this point in the history
sonnet-3.5
  • Loading branch information
mrT23 authored Jul 4, 2024
2 parents 12973c2 + 20d9d8a commit e35f83b
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 18 deletions.
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ CodiumAI PR-Agent aims to help efficiently review and handle pull requests, by p

## News and Updates

### July 4, 2024

Added improved support for claude-sonnet-3.5 model (anthropic, vertex, bedrock), including dedicated prompts.

### June 17, 2024

New option for a self-review checkbox is now available for the `/improve` tool, along with the ability(💎) to enable auto-approve, or demand self-review in addition to human reviewer. See more [here](https://pr-agent-docs.codium.ai/tools/improve/#self-review).
Expand All @@ -60,11 +64,6 @@ New option now available (💎) - **apply suggestions**:



### May 31, 2024

Check out the new [**PR-Agent Code Fine-tuning Benchmark**](https://pr-agent-docs.codium.ai/finetuning_benchmark/)


## Overview
<div style="text-align:left;">

Expand Down
1 change: 1 addition & 0 deletions pr_agent/algo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
'bedrock/anthropic.claude-v2:1': 100000,
'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000,
'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000,
'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000,
'groq/llama3-8b-8192': 8192,
'groq/llama3-70b-8192': 8192,
'ollama/llama3': 4096,
Expand Down
18 changes: 7 additions & 11 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ def __init__(self):
Raises a ValueError if the OpenAI key is missing.
"""
self.azure = False
self.aws_bedrock_client = None
self.api_base = None
self.repetition_penalty = None
if get_settings().get("OPENAI.KEY", None):
openai.api_key = get_settings().openai.key
litellm.openai_key = get_settings().openai.key
elif 'OPENAI_API_KEY' not in os.environ:
litellm.api_key = "dummy_key"
if get_settings().get("aws.AWS_ACCESS_KEY_ID"):
assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, "AWS credentials are incomplete"
os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID
os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY
os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME
if get_settings().get("litellm.use_client"):
litellm_token = get_settings().get("litellm.LITELLM_TOKEN")
assert litellm_token, "LITELLM_TOKEN is required"
Expand Down Expand Up @@ -71,14 +77,6 @@ def __init__(self):
litellm.vertex_location = get_settings().get(
"VERTEXAI.VERTEX_LOCATION", None
)
if get_settings().get("AWS.BEDROCK_REGION", None):
litellm.AmazonAnthropicConfig.max_tokens_to_sample = 2000
litellm.AmazonAnthropicClaude3Config.max_tokens = 2000
self.aws_bedrock_client = boto3.client(
service_name="bedrock-runtime",
region_name=get_settings().aws.bedrock_region,
)

def prepare_logs(self, response, system, user, resp, finish_reason):
response_log = response.dict().copy()
response_log['system'] = system
Expand Down Expand Up @@ -131,8 +129,6 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
"force_timeout": get_settings().config.ai_timeout,
"api_base": self.api_base,
}
if self.aws_bedrock_client:
kwargs["aws_bedrock_client"] = self.aws_bedrock_client
if self.repetition_penalty:
kwargs["repetition_penalty"] = self.repetition_penalty

Expand Down
99 changes: 99 additions & 0 deletions pr_agent/settings/pr_code_suggestions_prompts.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,102 @@ The PR Diff:
Response (should be a valid YAML, and nothing else):
```yaml
"""


[pr_code_suggestions_prompt_claude]
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
The format we will use to present the PR code diff:
======
## file: 'src/file1.py'
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
Specific instructions for generating code suggestions:
- Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
- Take into account that you are recieving as an input only a PR code diff. The entire codebase is not available for you as context. Hence, avoid suggestions that might conflict with unseen parts of the codebase, like imports, global variables, etc.
{%- if extra_instructions %}
Extra instructions from the user, that should be taken into account with high priority:
======
{{ extra_instructions }}
======
{%- endif %}
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="the relevant file full path")
language: str = Field(description="the code language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations ("...") if needed")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
label: str = Field(description="a single label for the suggestion, to help understand the suggestion type. For example: 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', etc. Other labels are also allowed")
class PRCodeSuggestions(BaseModel):
code_suggestions: List[CodeSuggestion]
=====
Example output:
```yaml
code_suggestions:
- relevant_file: |
src/file1.py
language: |
python
suggestion_content: |
...
existing_code: |
...
improved_code: |
...
one_sentence_summary: |
...
relevant_lines_start: 12
relevant_lines_end: 13
label: |
...
```
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""
10 changes: 8 additions & 2 deletions pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,15 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None,
"extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
"commit_messages_str": self.git_provider.get_commit_messages(),
}
if 'claude' in get_settings().config.model:
# prompt for Claude, with minor adjustments
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_claude.system
else:
self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system

self.token_handler = TokenHandler(self.git_provider.pr,
self.vars,
get_settings().pr_code_suggestions_prompt.system,
self.pr_code_suggestions_prompt_system,
get_settings().pr_code_suggestions_prompt.user)

self.progress = f"## Generating PR code suggestions\n\n"
Expand Down Expand Up @@ -280,7 +286,7 @@ async def _get_prediction(self, model: str, patches_diff: str) -> dict:
variables = copy.deepcopy(self.vars)
variables["diff"] = patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.system).render(variables)
system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
Expand Down

0 comments on commit e35f83b

Please sign in to comment.