diff --git a/bugbug/tools/code_review.py b/bugbug/tools/code_review.py index a08c515848..31386832d4 100644 --- a/bugbug/tools/code_review.py +++ b/bugbug/tools/code_review.py @@ -48,6 +48,8 @@ @dataclass class InlineComment: + # FIXME: we should drop this class and replace it with a class that + # represents a comment suggestion instead of a Phabricator inline comment. filename: str start_line: int end_line: int @@ -331,8 +333,9 @@ def __init__(self, patch_id) -> None: class Patch(ABC): - def __init__(self, patch_id: str) -> None: - self.patch_id = patch_id + @property + @abstractmethod + def patch_id(self) -> str: ... @property @abstractmethod @@ -368,11 +371,109 @@ def get_old_file(self, file_path: str) -> str: ... +class PhabricatorComment: + def __init__(self, transaction: dict): + comment = transaction["comments"][0] + self.id: int = comment["id"] + # TODO: dates should be datetime objects instead of int + self.date_created: int = comment["dateCreated"] + self.date_modified: int = comment["dateModified"] + self.content: str = comment["content"]["raw"] + self.author_phid: str = transaction["authorPHID"] + + +class PhabricatorGeneralComment(PhabricatorComment): + """Representation of a general comment posted on a Phabricator revision.""" + + +class PhabricatorInlineComment(PhabricatorComment): + """Representation of an inline comment posted on a Phabricator revision.""" + + def __init__(self, transaction: dict): + super().__init__(transaction) + + inline_fields = transaction["fields"] + self.diff_id = inline_fields["diff"]["id"] + self.filename = inline_fields["path"] + self.start_line = inline_fields["line"] + self.line_length = inline_fields["length"] + self.is_reply = inline_fields["replyToCommentPHID"] is not None + self.is_done = inline_fields["isDone"] + + # Unfortunately, we do not have this information for a limitation + # in Phabricator's API. + self.on_removed_code = None + + @property + def end_line(self) -> int: + return self.start_line + self.line_length - 1 + + @property + def is_generated(self) -> bool: + # This includes comments generated by Review Helper, but excludes any + # comments that have been edited by the user. + return ( + "> This comment was generated automatically and has been approved by" + in self.content + ) + + +def phabricator_transaction_to_comment( + transaction: dict, +) -> PhabricatorGeneralComment | PhabricatorInlineComment | None: + if not transaction.get("comments"): + return None + + if transaction["type"] == "inline": + return PhabricatorInlineComment(transaction) + + if transaction["type"] == "comment": + return PhabricatorGeneralComment(transaction) + + return None + + class PhabricatorPatch(Patch): - def __init__(self, patch_id: str) -> None: - super().__init__(patch_id) + def __init__( + self, + diff_id: Optional[str | int] = None, + revision_phid: Optional[str] = None, + revision_id: Optional[int] = None, + ) -> None: + assert diff_id or revision_phid or revision_id, ( + "You must provide at least one of diff_id, revision_phid, or revision_id" + ) + + self._diff_id = diff_id + self._revision_phid = revision_phid + self._revision_id = revision_id + + @property + def diff_id(self) -> int: + if self._diff_id: + return int(self._diff_id) - self.diff_id = int(patch_id) + if self._revision_id or self._revision_phid: + return int(self._revision_metadata["fields"]["diffID"]) + + raise ValueError("Cannot determine diff ID") + + @property + def patch_id(self) -> str: + return str(self.diff_id) + + @property + def revision_phid(self) -> str: + if self._revision_phid: + return self._revision_phid + + if self._revision_id: + return self._revision_metadata["phid"] + + if self._diff_id: + return self._diff_metadata["revisionPHID"] + + raise ValueError("Cannot determine revision PHID") def _get_file(self, file_path: str, is_before_patch: bool) -> str: for changeset in self._changesets: @@ -477,12 +578,19 @@ def base_commit_hash(self) -> str: def date_created(self) -> datetime: return datetime.fromtimestamp(self._diff_metadata["dateCreated"]) + @property + def date_modified(self) -> datetime: + return datetime.fromtimestamp(self._diff_metadata["dateModified"]) + @cached_property def _revision_metadata(self) -> dict: assert phabricator.PHABRICATOR_API is not None + # We pass either the revision PHID or the revision ID, not both. + revision_phid = self.revision_phid if not self._revision_id else None + revision = phabricator.PHABRICATOR_API.load_revision( - rev_phid=self._diff_metadata["revisionPHID"] + rev_phid=revision_phid, rev_id=self._revision_id ) return revision @@ -516,6 +624,190 @@ def bug_title(self) -> str: def patch_title(self) -> str: return self._revision_metadata["fields"]["title"] + @property + def revision_id(self) -> int: + return self._revision_metadata["id"] + + def _get_transactions(self) -> list[dict]: + assert phabricator.PHABRICATOR_API is not None + + transactions = phabricator.PHABRICATOR_API.request( + "transaction.search", + objectIdentifier=self._revision_metadata["phid"], + )["data"] + + return transactions + + def get_comments( + self, + ) -> Iterable[PhabricatorInlineComment | PhabricatorGeneralComment]: + transactions = self._get_transactions() + + for transaction in transactions: + comment = phabricator_transaction_to_comment(transaction) + if comment: + yield comment + + def to_md(self) -> str: + """Generate a comprehensive, LLM-friendly markdown representation of the patch. + + Returns a well-structured markdown document that includes revision metadata, + diff information, stack information, code changes, and comments. + """ + # TODO: print authors' names instead of PHIDs + + date_format = "%Y-%m-%d %H:%M:%S" + md_lines = [] + + revision = self._revision_metadata + md_lines.append(f"# Revision D{revision['id']}: {revision['fields']['title']}") + md_lines.append("") + md_lines.append("") + + md_lines.append("## Basic Information") + md_lines.append("") + md_lines.append(f"- **URI**: {revision['fields']['uri']}") + md_lines.append(f"- **Revision Author**: {revision['fields']['authorPHID']}") + md_lines.append(f"- **Title**: {revision['fields']['title']}") + md_lines.append(f"- **Status**: {revision['fields']['status']['name']}") + md_lines.append(f"- **Created**: {self.date_created.strftime(date_format)}") + md_lines.append(f"- **Modified**: {self.date_modified.strftime(date_format)}") + bug_id = revision["fields"].get("bugzilla.bug-id") or "N/A" + md_lines.append(f"- **Bugzilla Bug**: {bug_id}") + md_lines.append("") + md_lines.append("") + + summary = revision["fields"].get("summary") + if summary: + md_lines.append("## Summary") + md_lines.append("") + md_lines.append(summary) + md_lines.append("") + md_lines.append("") + + test_plan = revision["fields"].get("testPlan") + if test_plan: + md_lines.append("## Test Plan") + md_lines.append("") + md_lines.append(test_plan) + md_lines.append("") + md_lines.append("") + + md_lines.append("## Diff Information") + diff = self._diff_metadata + md_lines.append(f"- **Diff ID**: {diff['id']}") + md_lines.append(f"- **Base Revision**: `{diff['baseRevision']}`") + if revision["fields"]["authorPHID"] != diff["authorPHID"]: + md_lines.append(f"- **Diff Author**: {diff['authorPHID']}") + md_lines.append("") + md_lines.append("") + + stack_graph = revision["fields"].get("stackGraph") + if len(stack_graph) > 1: + md_lines.append("## Stack Information") + md_lines.append("") + md_lines.append("**Dependency Graph**:") + md_lines.append("") + md_lines.append("```mermaid") + md_lines.append("graph TD") + + current_phid = revision["phid"] + patch_map = { + phid: ( + self + if phid == current_phid + else PhabricatorPatch(revision_phid=phid) + ) + for phid in stack_graph.keys() + } + + for phid, dependencies in stack_graph.items(): + from_patch = patch_map[phid] + from_id = f"D{from_patch.revision_id}" + if phid == current_phid: + md_lines.append( + f" {from_id}[{from_patch.patch_title} - CURRENT]" + ) + md_lines.append(f" style {from_id} fill:#105823") + else: + md_lines.append(f" {from_id}[{from_patch.patch_title}]") + + for dep_phid in dependencies: + dep_id = f"D{patch_map[dep_phid].revision_id}" + md_lines.append(f" {from_id} --> {dep_id}") + + md_lines.append("```") + md_lines.append("") + md_lines.append("") + + md_lines.append("## Code Changes") + md_lines.append("") + + try: + md_lines.append("```diff") + md_lines.append(self.raw_diff) + md_lines.append("```") + except Exception: + logger.exception("Error while preparing the diff") + md_lines.append("*Error while preparing the diff*") + + md_lines.append("") + md_lines.append("") + + md_lines.append("## Comments Timeline") + md_lines.append("") + + sorted_comments = sorted( + # Ignore empty comments + (comment for comment in self.get_comments() if comment.content.strip()), + key=lambda c: c.date_created, + ) + for comment in sorted_comments: + date = datetime.fromtimestamp(comment.date_created) + date_str = date.strftime(date_format) + + if isinstance(comment, PhabricatorInlineComment): + line_info = ( + f"Line {comment.start_line}" + if comment.line_length == 1 + else f"Lines {comment.start_line}-{comment.end_line}" + ) + done_status = " [RESOLVED]" if comment.is_done else "" + generated_status = " [AI-GENERATED]" if comment.is_generated else "" + + md_lines.append( + f"**{date_str}** - **Inline Comment** by {comment.author_phid} on `{comment.filename}` " + f"at {line_info}{done_status}{generated_status}" + ) + else: + md_lines.append( + f"**{date_str}** - **General Comment** by {comment.author_phid}" + ) + + final_comment_content = comment.content + divider_index = final_comment_content.find("---") + if divider_index != -1: + # Remove footer notes that usually added by reviewbot + final_comment_content = final_comment_content[:divider_index].strip() + + # Truncate very long comments to avoid overloading the LLM + if len(final_comment_content) > 2000: + final_comment_content = ( + final_comment_content[:2000] + "...\n\n*[Content truncated]*" + ) + + md_lines.append("") + md_lines.append(final_comment_content) + md_lines.append("") + md_lines.append("---") + md_lines.append("") + + if not sorted_comments: + md_lines.append("*No comments*") + md_lines.append("") + + return "\n".join(md_lines) + def create_bug_timeline(comments: list[dict], history: list[dict]) -> list[str]: """Create a unified timeline from bug history and comments.""" @@ -916,57 +1208,32 @@ def get_all_inline_comments( ) continue - transaction_comment = transaction["comments"][0] - comment_content = transaction_comment["content"]["raw"] - is_generated = ( - # This includes comments generated by Review Helper, but - # excludes any comments that have been edited by the user. - "> This comment was generated automatically and has been approved by" - in comment_content - ) + comment = PhabricatorInlineComment(transaction) - # Ignore bot comments, except the ones by Review Helper + # Ignore reviewbot comments, except the ones generated by Review Helper if ( transaction["authorPHID"] == "PHID-USER-cje4weq32o3xyuegalpj" - and not is_generated + and not comment.is_generated ): continue - comment_id = transaction_comment["id"] - date_created = transaction_comment["dateCreated"] - diff_id = transaction["fields"]["diff"]["id"] - filename = transaction["fields"]["path"] - start_line = transaction["fields"]["line"] - end_line = ( - transaction["fields"]["line"] + transaction["fields"]["length"] - 1 - ) - # Unfortunately, we do not have this information for a limitation - # in Phabricator's API. - on_removed_code = None - - # store the last modified date and if the comments have been marked as done - date_modified = transaction_comment["dateModified"] - is_done = transaction["fields"]["isDone"] - - # TODO: we could create an extended dataclass for this - # instead of adding optional fields. - comment = InlineComment( - filename=filename, - start_line=start_line, - end_line=end_line, - content=comment_content, - on_removed_code=on_removed_code, - id=comment_id, - date_created=date_created, - date_modified=date_modified, - is_done=is_done, - is_generated=is_generated, - ) - if not comment_filter(comment): continue - diff_comments[diff_id].append(comment) + diff_comments[comment.diff_id].append( + InlineComment( + filename=comment.filename, + start_line=comment.start_line, + end_line=comment.end_line, + content=comment.content, + on_removed_code=comment.on_removed_code, + id=comment.id, + date_created=comment.date_created, + date_modified=comment.date_modified, + is_done=comment.is_done, + is_generated=comment.is_generated, + ) + ) for diff_id, comments in diff_comments.items(): yield diff_id, comments @@ -974,10 +1241,14 @@ def get_all_inline_comments( class SwarmPatch(Patch): def __init__(self, patch_id: str, auth: dict) -> None: - super().__init__(patch_id) + self._patch_id = patch_id self.auth = auth self.rev_id = int(patch_id) + @property + def patch_id(self) -> str: + return self._patch_id + @cached_property def _revision_metadata(self) -> dict: import swarm diff --git a/mcp/src/bugbug_mcp/server.py b/mcp/src/bugbug_mcp/server.py index 7c85f5c960..a7671f56cf 100644 --- a/mcp/src/bugbug_mcp/server.py +++ b/mcp/src/bugbug_mcp/server.py @@ -227,6 +227,16 @@ def handle_bug_view_resource(bug_id: int) -> str: return Bug.get(bug_id).to_md() +@mcp.resource( + uri="phabricator://revision/D{revision_id}", + name="Phabricator Revision Content", + mime_type="text/markdown", +) +def handle_revision_view_resource(revision_id: int) -> str: + """Retrieve a revision from Phabricator alongside its comments.""" + return PhabricatorPatch(revision_id=revision_id).to_md() + + def main(): phabricator.set_api_key( get_secret("PHABRICATOR_URL"), get_secret("PHABRICATOR_TOKEN")