Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor logging #708

Merged
merged 5 commits into from
Feb 25, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions pr_agent/algo/ai_handlers/litellm_ai_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
TryAgain: If there is an attribute error during OpenAI inference.
"""
try:
resp, finish_reason = None, None
deployment_id = self.deployment_id
if self.azure:
model = 'azure/' + model
Expand All @@ -113,6 +114,8 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
}
if self.aws_bedrock_client:
kwargs["aws_bedrock_client"] = self.aws_bedrock_client

get_logger().debug("Prompts", messages={"system": system, "user": user})
response = await acompletion(**kwargs)
except (APIError, Timeout, TryAgain) as e:
get_logger().error("Error during OpenAI inference: ", e)
Expand All @@ -125,9 +128,11 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
raise TryAgain from e
if response is None or len(response["choices"]) == 0:
raise TryAgain
resp = response["choices"][0]['message']['content']
finish_reason = response["choices"][0]["finish_reason"]
usage = response.get("usage")
get_logger().info("AI response", response=resp, messages=messages, finish_reason=finish_reason,
model=model, usage=usage)
else:
resp = response["choices"][0]['message']['content']
finish_reason = response["choices"][0]["finish_reason"]
# usage = response.get("usage")
get_logger().debug(f"\nAI response:\n{resp}")
get_logger().debug("full_response", response=response)

return resp, finish_reason
38 changes: 30 additions & 8 deletions pr_agent/algo/pr_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,25 +50,43 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
PATCH_EXTRA_LINES = get_settings().config.patch_extra_lines

try:
diff_files = git_provider.get_diff_files()
diff_files_original = git_provider.get_diff_files()
except RateLimitExceededException as e:
get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
raise

diff_files = filter_ignored(diff_files)
diff_files = filter_ignored(diff_files_original)
if diff_files != diff_files_original:
try:
get_logger().info(f"Filtered out {len(diff_files_original) - len(diff_files)} files")
new_names = set([a.filename for a in diff_files])
orig_names = set([a.filename for a in diff_files_original])
get_logger().info(f"Filtered out files: {orig_names - new_names}")
except Exception as e:
pass


# get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
if pr_languages:
try:
get_logger().info(f"PR main language: {pr_languages[0]['language']}")
except Exception as e:
pass

# generate a standard diff string, with patch extension
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
pr_languages, token_handler, add_line_numbers_to_hunks, patch_extra_lines=PATCH_EXTRA_LINES)

# if we are under the limit, return the full diff
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
get_logger().info(f"Tokens: {total_tokens}, total tokens under limit: {get_max_tokens(model)}, "
f"returning full diff.")
return "\n".join(patches_extended)

# if we are over the limit, start pruning
get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, "
f"pruning diff.")
patches_compressed, modified_file_names, deleted_file_names, added_file_names = \
pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks)

Expand All @@ -82,6 +100,11 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s
if deleted_file_names:
deleted_list_str = DELETED_FILES_ + "\n".join(deleted_file_names)
final_diff = final_diff + "\n\n" + deleted_list_str
try:
get_logger().debug(f"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, "
f"deleted_list_str: {deleted_list_str}")
except Exception as e:
pass
return final_diff


Expand Down Expand Up @@ -225,14 +248,13 @@ async def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelT
# try each (model, deployment_id) pair until one is successful, otherwise raise exception
for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
try:
if get_settings().config.verbosity_level >= 2:
get_logger().debug(
f"Generating prediction with {model}"
f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
)
get_logger().debug(
f"Generating prediction with {model}"
f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
)
get_settings().set("openai.deployment_id", deployment_id)
return await f(model)
except Exception as e:
except:
get_logger().warning(
f"Failed to generate prediction with {model}"
f"{(' from deployment ' + deployment_id) if deployment_id else ''}: "
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/algo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def get_user_labels(current_labels: List[str] = None):
continue
user_labels.append(label)
if user_labels:
get_logger().info(f"Keeping user labels: {user_labels}")
get_logger().debug(f"Keeping user labels: {user_labels}")
except Exception as e:
get_logger().exception(f"Failed to get user labels: {e}")
return current_labels
Expand Down
3 changes: 2 additions & 1 deletion pr_agent/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from pr_agent.config_loader import get_settings
from pr_agent.log import setup_logger

setup_logger()
log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)



Expand Down
9 changes: 7 additions & 2 deletions pr_agent/git_providers/git_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,12 @@ def get_pr_description(self, *, full: bool = True) -> str:
return description

def get_user_description(self) -> str:
if hasattr(self, 'user_description') and not (self.user_description is None):
return self.user_description

description = (self.get_pr_description_full() or "").strip()
description_lowercase = description.lower()
get_logger().info(f"Existing description:\n{description_lowercase}")
get_logger().debug(f"Existing description", description=description_lowercase)

# if the existing description wasn't generated by the pr-agent, just return it as-is
if not self._is_generated_by_pr_agent(description_lowercase):
Expand Down Expand Up @@ -93,7 +96,9 @@ def get_user_description(self) -> str:
if original_user_description.lower().startswith(user_description_header):
original_user_description = original_user_description[len(user_description_header):].strip()

get_logger().info(f"Extracted user description from existing description:\n{original_user_description}")
get_logger().info(f"Extracted user description from existing description",
description=original_user_description)
self.user_description = original_user_description
return original_user_description

def _possible_headers(self):
Expand Down
2 changes: 1 addition & 1 deletion pr_agent/git_providers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def apply_repo_settings(pr_url):
section_dict[key] = value
get_settings().unset(section)
get_settings().set(section, section_dict, merge=False)
get_logger().info(f"Applying repo settings for section {section}, contents: {contents}")
get_logger().info(f"Applying repo settings:\n{new_settings.as_dict()}")
except Exception as e:
get_logger().exception("Failed to apply repo settings", e)
finally:
Expand Down
5 changes: 3 additions & 2 deletions pr_agent/log/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import os
import sys
from enum import Enum

Expand All @@ -20,7 +21,7 @@ def setup_logger(level: str = "INFO", fmt: LoggingFormat = LoggingFormat.CONSOLE
if type(level) is not int:
level = logging.INFO

if fmt == LoggingFormat.JSON:
if fmt == LoggingFormat.JSON and os.getenv("LOG_SANE", "0").lower() == "0": # better debugging github_app
logger.remove(None)
logger.add(
sys.stdout,
Expand All @@ -29,7 +30,7 @@ def setup_logger(level: str = "INFO", fmt: LoggingFormat = LoggingFormat.CONSOLE
colorize=False,
serialize=True,
)
elif fmt == LoggingFormat.CONSOLE:
elif fmt == LoggingFormat.CONSOLE: # does not print the 'extra' fields
logger.remove(None)
logger.add(sys.stdout, level=level, colorize=True)

Expand Down
6 changes: 3 additions & 3 deletions pr_agent/servers/github_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,15 @@ async def handle_request(body: Dict[str, Any], event: str):

# handle comments on PRs
if action == 'created':
get_logger().debug(f'Request body:\n{body}')
get_logger().debug(f'Request body', body=body)
await handle_comments_on_pr(body, event, sender, action, log_context, agent)
# handle new PRs
elif event == 'pull_request' and action != 'synchronize':
get_logger().debug(f'Request body:\n{body}')
get_logger().debug(f'Request body', body=body)
await handle_new_pr_opened(body, event, sender, action, log_context, agent)
# handle pull_request event with synchronize action - "push trigger" for new commits
elif event == 'pull_request' and action == 'synchronize':
get_logger().debug(f'Request body:\n{body}')
get_logger().debug(f'Request body', body=body)
await handle_push_trigger_for_new_commits(body, event, sender, action, log_context, agent)
else:
get_logger().info(f"event {event=} action {action=} does not require any handling")
Expand Down
1 change: 1 addition & 0 deletions pr_agent/settings/configuration.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ excluded_checks_list=["lint"] # list of checks to exclude, for example: ["check1
persistent_comment=true
enable_help_text=true

[pr_help] # /help #

[pr_config] # /config #

Expand Down
86 changes: 39 additions & 47 deletions pr_agent/tools/pr_code_suggestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,16 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None,
async def run(self):
try:
get_logger().info('Generating code suggestions for PR...')
relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),
'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", configs=relevant_configs)

if get_settings().config.publish_output:
if self.git_provider.is_supported("gfm_markdown"):
self.progress_response = self.git_provider.publish_comment(self.progress)
else:
self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)

get_logger().info('Preparing PR code suggestions...')
if not self.is_extended:
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
data = self._prepare_pr_code_suggestions()
Expand All @@ -97,13 +99,12 @@ async def run(self):
data['code_suggestions'] = await self.rank_suggestions(data['code_suggestions'])

if get_settings().config.publish_output:
get_logger().info('Pushing PR code suggestions...')
self.git_provider.remove_initial_comment()
if get_settings().pr_code_suggestions.summarize and self.git_provider.is_supported("gfm_markdown"):
get_logger().info('Pushing summarize code suggestions...')

# generate summarized suggestions
pr_body = self.generate_summarized_suggestions(data)
get_logger().debug(f"PR output", suggestions=pr_body)

# add usage guide
if get_settings().pr_code_suggestions.enable_help_text:
Expand All @@ -117,7 +118,6 @@ async def run(self):
self.git_provider.publish_comment(pr_body)

else:
get_logger().info('Pushing inline code suggestions...')
self.push_inline_code_suggestions(data)
if self.progress_response:
self.progress_response.delete()
Expand All @@ -127,31 +127,28 @@ async def run(self):
self.progress_response.delete()

async def _prepare_prediction(self, model: str):
get_logger().info('Getting PR diff...')
patches_diff = get_pr_diff(self.git_provider,
self.patches_diff = get_pr_diff(self.git_provider,
self.token_handler,
model,
add_line_numbers_to_hunks=True,
disable_extra_lines=True)

get_logger().info('Getting AI prediction...')
self.prediction = await self._get_prediction(model, patches_diff)
if self.patches_diff:
get_logger().debug(f"PR diff", diff=self.patches_diff)
self.prediction = await self._get_prediction(model, self.patches_diff)
else:
get_logger().error(f"Error getting PR diff")
self.prediction = None

async def _get_prediction(self, model: str, patches_diff: str):
variables = copy.deepcopy(self.vars)
variables["diff"] = patches_diff # update diff
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")

response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
system=system_prompt, user=user_prompt)

if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nAI response:\n{response}")

return response

def _prepare_pr_code_suggestions(self) -> Dict:
Expand Down Expand Up @@ -185,8 +182,6 @@ def push_inline_code_suggestions(self, data):

for d in data['code_suggestions']:
try:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"suggestion: {d}")
relevant_file = d['relevant_file'].strip()
relevant_lines_start = int(d['relevant_lines_start']) # absolute position
relevant_lines_end = int(d['relevant_lines_end'])
Expand All @@ -202,8 +197,7 @@ def push_inline_code_suggestions(self, data):
'relevant_lines_start': relevant_lines_start,
'relevant_lines_end': relevant_lines_end})
except Exception:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not parse suggestion: {d}")
get_logger().info(f"Could not parse suggestion: {d}")

is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
if not is_successful:
Expand All @@ -229,8 +223,7 @@ def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
if delta_spaces > 0:
new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
except Exception as e:
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")
get_logger().error(f"Could not dedent code snippet for file {relevant_file}, error: {e}")

return new_code_snippet

Expand All @@ -245,32 +238,33 @@ def _get_is_extended(self, args: list[str]) -> bool:
return False

async def _prepare_prediction_extended(self, model: str) -> dict:
get_logger().info('Getting PR diff...')
patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
self.patches_diff_list = get_pr_multi_diffs(self.git_provider, self.token_handler, model,
max_calls=get_settings().pr_code_suggestions.max_number_of_calls)
if self.patches_diff_list:
get_logger().debug(f"PR diff", diff=self.patches_diff_list)

# parallelize calls to AI:
if get_settings().pr_code_suggestions.parallel_calls:
get_logger().info('Getting multi AI predictions in parallel...')
prediction_list = await asyncio.gather(*[self._get_prediction(model, patches_diff) for patches_diff in patches_diff_list])
self.prediction_list = prediction_list
else:
get_logger().info('Getting multi AI predictions...')
prediction_list = []
for i, patches_diff in enumerate(patches_diff_list):
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
prediction = await self._get_prediction(model, patches_diff)
prediction_list.append(prediction)

data = {}
for prediction in prediction_list:
self.prediction = prediction
data_per_chunk = self._prepare_pr_code_suggestions()
if "code_suggestions" in data:
data["code_suggestions"].extend(data_per_chunk["code_suggestions"])
# parallelize calls to AI:
if get_settings().pr_code_suggestions.parallel_calls:
prediction_list = await asyncio.gather(*[self._get_prediction(model, patches_diff) for patches_diff in self.patches_diff_list])
self.prediction_list = prediction_list
else:
data.update(data_per_chunk)
self.data = data
prediction_list = []
for i, patches_diff in enumerate(self.patches_diff_list):
prediction = await self._get_prediction(model, patches_diff)
prediction_list.append(prediction)

data = {}
for prediction in prediction_list:
self.prediction = prediction
data_per_chunk = self._prepare_pr_code_suggestions()
if "code_suggestions" in data:
data["code_suggestions"].extend(data_per_chunk["code_suggestions"])
else:
data.update(data_per_chunk)
self.data = data
else:
get_logger().error(f"Error getting PR diff")
self.data = data = None
return data

async def rank_suggestions(self, data: List) -> List:
Expand Down Expand Up @@ -305,9 +299,7 @@ async def rank_suggestions(self, data: List) -> List:
system_prompt = environment.from_string(get_settings().pr_sort_code_suggestions_prompt.system).render(
variables)
user_prompt = environment.from_string(get_settings().pr_sort_code_suggestions_prompt.user).render(variables)
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")

response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
user=user_prompt)

Expand Down
Loading
Loading