Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions bugbug/tools/code_review/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""

# Agent
from bugbug.tools.code_review.agent import TARGET_SOFTWARE, CodeReviewTool
from bugbug.tools.code_review.agent import CodeReviewTool

# Databases
from bugbug.tools.code_review.database import (
Expand Down Expand Up @@ -54,7 +54,6 @@
__all__ = [
# Agent
"CodeReviewTool",
"TARGET_SOFTWARE",
# Databases
"EvaluationAction",
"ReviewCommentsDB",
Expand Down
121 changes: 72 additions & 49 deletions bugbug/tools/code_review/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@
import os
from datetime import datetime
from logging import getLogger
from typing import Iterable, Literal, Optional
from typing import Iterable, Optional

from langchain.agents import create_agent
from langchain.agents.structured_output import ProviderStrategy
from langchain.chat_models import BaseChatModel
from langchain.messages import HumanMessage
from langchain_classic.chains import LLMChain
from langchain_classic.prompts import PromptTemplate
from langgraph.errors import GraphRecursionError
from pydantic import BaseModel, Field
from unidiff import PatchSet

from bugbug.code_search.function_search import FunctionSearch
Expand All @@ -29,19 +31,17 @@
)
from bugbug.tools.code_review.prompts import (
DEFAULT_REJECTED_EXAMPLES,
OUTPUT_FORMAT_JSON,
OUTPUT_FORMAT_TEXT,
FIRST_MESSAGE_TEMPLATE,
PROMPT_TEMPLATE_FILTERING_ANALYSIS,
PROMPT_TEMPLATE_REVIEW,
PROMPT_TEMPLATE_SUMMARIZATION,
STATIC_COMMENT_EXAMPLES,
SYSTEM_PROMPT_TEMPLATE,
TEMPLATE_COMMENT_EXAMPLE,
TEMPLATE_PATCH_FROM_HUNK,
)
from bugbug.tools.code_review.utils import (
format_patch_set,
generate_processed_output,
parse_model_output,
)
from bugbug.tools.core.data_types import InlineComment
from bugbug.tools.core.exceptions import LargeDiffError, ModelResultError
Expand All @@ -50,8 +50,27 @@

logger = getLogger(__name__)

# Global variable for target software
TARGET_SOFTWARE: str | None = None

class GeneratedReviewComment(BaseModel):
"""A review comment generated by the code review agent."""

file: str = Field(description="The path to the file the comment applies to.")
code_line: int = Field(description="The line number that the comment refers to.")
comment: str = Field(description="The review comment.")
explanation: str = Field(
description="A brief rationale for the comment, including how confident you are and why."
)
order: int = Field(
description="An integer representing the priority of the comment, with 1 being the highest confidence/importance."
)


class AgentResponse(BaseModel):
"""The response from the code review agent."""

comments: list[GeneratedReviewComment] = Field(
description="A list of generated review comments."
)


class CodeReviewTool(GenerativeModelTool):
Expand All @@ -60,16 +79,18 @@ class CodeReviewTool(GenerativeModelTool):
def __init__(
self,
llm: BaseChatModel,
summarization_llm: BaseChatModel,
filtering_llm: BaseChatModel,
function_search: Optional[FunctionSearch] = None,
review_comments_db: Optional["ReviewCommentsDB"] = None,
show_patch_example: bool = False,
verbose: bool = True,
suggestions_feedback_db: Optional["SuggestionsFeedbackDB"] = None,
target_software: Optional[str] = None,
target_software: str = "Mozilla Firefox",
) -> None:
super().__init__()

self.target_software = target_software or TARGET_SOFTWARE
self.target_software = target_software

self._tokenizer = get_tokenizer(
llm.model_name if hasattr(llm, "model_name") else ""
Expand All @@ -87,28 +108,22 @@ def __init__(
"----------------------------------------------------"
)

experience_scope = (
f"the {self.target_software} source code"
if self.target_software
else "a software project"
)

self.summarization_chain = LLMChain(
prompt=PromptTemplate.from_template(
PROMPT_TEMPLATE_SUMMARIZATION,
partial_variables={"experience_scope": experience_scope},
partial_variables={
"experience_scope": f"the {self.target_software} source code"
},
),
llm=llm,
llm=summarization_llm,
verbose=verbose,
)
self.filtering_chain = LLMChain(
prompt=PromptTemplate.from_template(
PROMPT_TEMPLATE_FILTERING_ANALYSIS,
partial_variables={
"target_code_consistency": self.target_software or "rest of the"
},
partial_variables={"target_code_consistency": self.target_software},
),
llm=llm,
llm=filtering_llm,
verbose=verbose,
)

Expand All @@ -119,7 +134,10 @@ def __init__(
self.agent = create_agent(
llm,
tools,
system_prompt=f"You are an expert reviewer for {experience_scope}, with experience on source code reviews.",
system_prompt=SYSTEM_PROMPT_TEMPLATE.format(
target_software=self.target_software,
),
response_format=ProviderStrategy(AgentResponse),
)

self.review_comments_db = review_comments_db
Expand All @@ -130,50 +148,53 @@ def __init__(

self.suggestions_feedback_db = suggestions_feedback_db

@staticmethod
def create(
llm=None, summarization_llm=None, filtering_llm=None, **kwargs
) -> "CodeReviewTool":
from bugbug.tools.core.llms import create_anthropic_llm

return CodeReviewTool(
llm=llm
or create_anthropic_llm(
model_name="claude-opus-4-5-20251101",
max_tokens=40_000,
temperature=None,
thinking={"type": "enabled", "budget_tokens": 10_000},
),
summarization_llm=summarization_llm or create_anthropic_llm(),
filtering_llm=filtering_llm or create_anthropic_llm(),
**kwargs,
)

def count_tokens(self, text):
return len(self._tokenizer.encode(text))

def generate_initial_prompt(
self, patch: Patch, output_format: Literal["JSON", "TEXT"] = "JSON"
) -> str:
def generate_initial_prompt(self, patch: Patch) -> str:
formatted_patch = format_patch_set(patch.patch_set)

output_summarization = self.summarization_chain.invoke(
{
"patch": formatted_patch,
"bug_title": patch.bug_title,
"patch_title": patch.patch_title,
"patch_description": patch.patch_description,
},
return_only_outputs=True,
)["text"]

if self.verbose:
GenerativeModelTool._print_answer(output_summarization)

if output_format == "JSON":
output_instructions = OUTPUT_FORMAT_JSON
elif output_format == "TEXT":
output_instructions = OUTPUT_FORMAT_TEXT
else:
raise ValueError(
f"Unsupported output format: {output_format}, choose JSON or TEXT"
)

created_before = patch.date_created if self.is_experiment_env else None
return PROMPT_TEMPLATE_REVIEW.format(
return FIRST_MESSAGE_TEMPLATE.format(
patch=formatted_patch,
patch_summarization=output_summarization,
comment_examples=self._get_comment_examples(patch, created_before),
approved_examples=self._get_generated_examples(patch, created_before),
target_code_consistency=self.target_software or "rest of the",
output_instructions=output_instructions,
bug_title=patch.bug_title,
patch_title=patch.patch_title,
patch_url=patch.patch_url,
target_software=self.target_software,
)

def _generate_suggestions(self, patch: Patch):
def _generate_suggestions(self, patch: Patch) -> list[GeneratedReviewComment]:
try:
for chunk in self.agent.stream(
{
Expand All @@ -189,15 +210,13 @@ def _generate_suggestions(self, patch: Patch):
except GraphRecursionError as e:
raise ModelResultError("The model could not complete the review") from e

return result["messages"][-1].content
return result["structured_response"].comments

def run(self, patch: Patch) -> list[InlineComment] | None:
if self.count_tokens(patch.raw_diff) > 21000:
raise LargeDiffError("The diff is too large")

output = self._generate_suggestions(patch)

unfiltered_suggestions = parse_model_output(output)
unfiltered_suggestions = self._generate_suggestions(patch)
if not unfiltered_suggestions:
logger.info("No suggestions were generated")
return []
Expand All @@ -210,7 +229,9 @@ def run(self, patch: Patch) -> list[InlineComment] | None:

raw_output = self.filtering_chain.invoke(
{
"comments": output,
"comments": str(
[comment.model_dump() for comment in unfiltered_suggestions]
),
"rejected_examples": rejected_examples,
},
return_only_outputs=True,
Expand Down Expand Up @@ -300,7 +321,9 @@ def generate_formatted_patch_from_raw_hunk(raw_hunk, filename):
for num, example in enumerate(comment_examples)
)

def get_similar_rejected_comments(self, suggestions) -> Iterable[str]:
def get_similar_rejected_comments(
self, suggestions: list[GeneratedReviewComment]
) -> Iterable[str]:
if not self.suggestions_feedback_db:
raise Exception("Suggestions feedback database is not available")

Expand All @@ -310,7 +333,7 @@ def get_similar_rejected_comments(self, suggestions) -> Iterable[str]:
for suggestion in suggestions:
similar_rejected_suggestions = (
self.suggestions_feedback_db.find_similar_rejected_suggestions(
suggestion["comment"],
suggestion.comment,
limit=num_examples_per_suggestion,
excluded_ids=seen_ids,
)
Expand Down
Loading