Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add end-to-end tests for GitHub, GitLab, and Bitbucket apps; added seed #1063

Merged
merged 7 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-and-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,6 @@ jobs:
- id: test
name: Test dev docker
run: |
docker run --rm codiumai/pr-agent:test pytest -v
docker run --rm codiumai/pr-agent:test pytest -v tests/unittest


46 changes: 46 additions & 0 deletions .github/workflows/e2e_tests.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: PR-Agent E2E tests

on:
workflow_dispatch:
# schedule:
# - cron: '0 0 * * *' # This cron expression runs the workflow every night at midnight UTC

jobs:
pr_agent_job:
runs-on: ubuntu-latest
name: PR-Agent E2E GitHub App Test
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Setup Docker Buildx
uses: docker/setup-buildx-action@v2

- id: build
name: Build dev docker
uses: docker/build-push-action@v2
with:
context: .
file: ./docker/Dockerfile
push: false
load: true
tags: codiumai/pr-agent:test
cache-from: type=gha,scope=dev
cache-to: type=gha,mode=max,scope=dev
target: test

- id: test1
name: E2E test github app
run: |
docker run -e GITHUB.USER_TOKEN=${{ secrets.TOKEN_GITHUB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_github_app.py

- id: test2
name: E2E gitlab webhook
run: |
docker run -e gitlab.PERSONAL_ACCESS_TOKEN=${{ secrets.TOKEN_GITLAB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_gitlab_webhook.py


- id: test3
name: E2E bitbucket app
run: |
docker run -e BITBUCKET.USERNAME=${{ secrets.BITBUCKET_USERNAME }} -e BITBUCKET.PASSWORD=${{ secrets.BITBUCKET_PASSWORD }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_bitbucket_app.py
7 changes: 7 additions & 0 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
"force_timeout": get_settings().config.ai_timeout,
"api_base": self.api_base,
}
seed = get_settings().config.get("seed", -1)
if temperature > 0 and seed >= 0:
raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0")
elif seed >= 0:
get_logger().info(f"Using fixed seed of {seed}")
kwargs["seed"] = seed

if self.repetition_penalty:
kwargs["repetition_penalty"] = self.repetition_penalty

Expand Down
7 changes: 7 additions & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
[config]
# models
model="gpt-4-turbo-2024-04-09"
model_turbo="gpt-4o"
fallback_models=["gpt-4-0125-preview"]
# CLI
git_provider="github"
publish_output=true
publish_output_progress=true
verbosity_level=0 # 0,1,2
use_extra_bad_extensions=false
# Configurations
use_wiki_settings_file=true
use_repo_settings_file=true
use_global_settings_file=true
ai_timeout=120 # 2minutes
# token limits
max_description_tokens = 500
max_commits_tokens = 500
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
Expand All @@ -22,6 +26,9 @@ ai_disclaimer="" # Pro feature, full text for the AI disclaimer
output_relevant_configurations=false
large_patch_policy = "clip" # "clip", "skip"
is_auto_command=false
# seed
seed=-1 # set positive value to fix the seed (and ensure temperature=0)
temperature=0.2

[pr_reviewer] # /review #
# enable/disable features
Expand Down
4 changes: 2 additions & 2 deletions pr_agent/tools/pr_add_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ async def _get_prediction(self, model: str):
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)

return response

Expand Down
4 changes: 2 additions & 2 deletions pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ async def _get_prediction(self, model: str, patches_diff: str) -> dict:
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)

# load suggestions from the AI response
data = self._prepare_pr_code_suggestions(response)
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/tools/pr_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_descri

response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
temperature=get_settings().config.temperature,
system=system_prompt,
user=user_prompt
)
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/tools/pr_generate_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ async def _get_prediction(self, model: str) -> str:

response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
temperature=get_settings().config.temperature,
system=system_prompt,
user=user_prompt
)
Expand Down
4 changes: 2 additions & 2 deletions pr_agent/tools/pr_information_from_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ async def _get_prediction(self, model: str):
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
return response

def _prepare_pr_answer(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions pr_agent/tools/pr_line_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,6 @@ async def _get_prediction(self, model: str):
print(f"\nSystem prompt:\n{system_prompt}")
print(f"\nUser prompt:\n{user_prompt}")

response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
return response
10 changes: 5 additions & 5 deletions pr_agent/tools/pr_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ async def _get_prediction(self, model: str):
user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)
if 'img_path' in variables:
img_path = self.vars['img_path']
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt,
img_path=img_path)
response, finish_reason = await (self.ai_handler.chat_completion
(model=model, temperature=get_settings().config.temperature,
system=system_prompt, user=user_prompt, img_path=img_path))
else:
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
return response

def _prepare_pr_answer(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/tools/pr_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ async def _get_prediction(self, model: str) -> str:

response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
temperature=get_settings().config.temperature,
system=system_prompt,
user=user_prompt
)
Expand Down
4 changes: 2 additions & 2 deletions pr_agent/tools/pr_update_changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ async def _get_prediction(self, model: str):
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables)
response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)
response, finish_reason = await self.ai_handler.chat_completion(
model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature)

return response

Expand Down
35 changes: 35 additions & 0 deletions tests/e2e_tests/e2e_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FILE_PATH = "pr_agent/cli_pip.py"

PR_HEADER_START_WITH = '### **User description**\nupdate cli_pip.py\n\n\n___\n\n### **PR Type**'
REVIEW_START_WITH = '## PR Reviewer Guide 🔍\n\n<table>\n<tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>:'
IMPROVE_START_WITH_REGEX_PATTERN = r'^## PR Code Suggestions ✨\n\n<!-- [a-z0-9]+ -->\n\n<table><thead><tr><td>Category</td>'

NUM_MINUTES = 5

NEW_FILE_CONTENT = """\
from pr_agent import cli
from pr_agent.config_loader import get_settings


def main():
# Fill in the following values
provider = "github" # GitHub provider
user_token = "..." # GitHub user token
openai_key = "ghs_afsdfasdfsdf" # Example OpenAI key
pr_url = "..." # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'
command = "/improve" # Command to run (e.g. '/review', '/describe', 'improve', '/ask="What is the purpose of this PR?"')

# Setting the configurations
get_settings().set("CONFIG.git_provider", provider)
get_settings().set("openai.key", openai_key)
get_settings().set("github.user_token", user_token)

# Run the command. Feedback will appear in GitHub PR comments
output = cli.run_command(pr_url, command)

print(output)

if __name__ == '__main__':
main()
"""

100 changes: 100 additions & 0 deletions tests/e2e_tests/test_bitbucket_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import hashlib
import os
import re
import time
from datetime import datetime

import jwt
from atlassian.bitbucket import Cloud

import requests
from requests.auth import HTTPBasicAuth

from pr_agent.config_loader import get_settings
from pr_agent.log import setup_logger, get_logger
from tests.e2e_tests.e2e_utils import NEW_FILE_CONTENT, FILE_PATH, PR_HEADER_START_WITH, REVIEW_START_WITH, \
IMPROVE_START_WITH_REGEX_PATTERN, NUM_MINUTES


log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
logger = get_logger()

def test_e2e_run_bitbucket_app():
repo_slug = 'pr-agent-tests'
project_key = 'codiumai'
base_branch = "main" # or any base branch you want
new_branch = f"bitbucket_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
get_settings().config.git_provider = "bitbucket"

try:
# Add username and password for authentication
username = get_settings().get("BITBUCKET.USERNAME", None)
password = get_settings().get("BITBUCKET.PASSWORD", None)
s = requests.Session()
s.auth = (username, password) # Use HTTP Basic Auth
bitbucket_client = Cloud(session=s)
repo = bitbucket_client.workspaces.get(workspace=project_key).repositories.get(repo_slug)

# Create a new branch from the base branch
logger.info(f"Creating a new branch {new_branch} from {base_branch}")
source_branch = repo.branches.get(base_branch)
target_repo = repo.branches.create(new_branch,source_branch.hash)

# Update the file content
url = (f"https://api.bitbucket.org/2.0/repositories/{project_key}/{repo_slug}/src")
files={FILE_PATH: NEW_FILE_CONTENT}
data={
"message": "update cli_pip.py",
"branch": new_branch,
}
requests.request("POST", url, auth=HTTPBasicAuth(username, password), data=data, files=files)


# Create a pull request
logger.info(f"Creating a pull request from {new_branch} to {base_branch}")
pr = repo.pullrequests.create(
title=f'{new_branch}',
description="update cli_pip.py",
source_branch=new_branch,
destination_branch=base_branch
)

# check every 1 minute, for 5 minutes if the PR has all the tool results
for i in range(NUM_MINUTES):
logger.info(f"Waiting for the PR to get all the tool results...")
time.sleep(60)
comments = list(pr.comments())
comments_raw = [c.raw for c in comments]
if len(comments) >= 5: # header, 3 suggestions, 1 review
valid_review = False
for comment_raw in comments_raw:
if comment_raw.startswith('## PR Reviewer Guide 🔍'):
valid_review = True
break
if valid_review:
break
else:
logger.error(f"REVIEW feedback is invalid")
raise Exception("REVIEW feedback is invalid")
else:
logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed")
else:
assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results"

# cleanup - delete the branch
pr.decline()
repo.branches.delete(new_branch)

# If we reach here, the test is successful
logger.info(f"Succeeded in running e2e test for Bitbucket app on the PR")
except Exception as e:
logger.error(f"Failed to run e2e test for Bitbucket app: {e}")
# delete the branch
pr.decline()
repo.branches.delete(new_branch)
assert False


if __name__ == '__main__':
test_e2e_run_bitbucket_app()
Loading
Loading