From 29721df2ce1c90e59ac63c4e64cc52711552b29e Mon Sep 17 00:00:00 2001 From: Igor Radovanovic <74266147+IgorWounds@users.noreply.github.com> Date: Tue, 23 Apr 2024 23:03:51 +0200 Subject: [PATCH] Summarize Changelog --- .github/release-drafter.yml | 8 +- .github/workflows/draft-release.yml | 13 ++- summarize_chanelog.py | 166 ++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+), 10 deletions(-) create mode 100644 summarize_chanelog.py diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml index eab4af1c80a2..4825d814fb63 100644 --- a/.github/release-drafter.yml +++ b/.github/release-drafter.yml @@ -1,6 +1,9 @@ name-template: 'OpenBB Platform v$NEXT_MINOR_VERSION' tag-template: 'v$NEXT_MINOR_VERSION' categories: + - title: 🚨 OpenBB Platform Breaking Changes + labels: + - 'breaking_change' - title: 🦋 OpenBB Platform Enhancements labels: - 'platform' @@ -8,9 +11,6 @@ categories: - title: 🐛 OpenBB Platform Bug Fixes labels: - 'bug' - - title: 🚨 OpenBB Platform Breaking Changes - labels: - - 'breaking_change' - title: 📚 OpenBB Documentation Changes labels: - 'docs' @@ -35,7 +35,7 @@ template: | ## Thank you and welcome to our new contributors 🔥 $CONTRIBUTORS - ## What's new 🎉 + ## Summary 🎉 ## What's changed 🚀 $CHANGES diff --git a/.github/workflows/draft-release.yml b/.github/workflows/draft-release.yml index 51f8ddb7f86c..37367dc5303c 100644 --- a/.github/workflows/draft-release.yml +++ b/.github/workflows/draft-release.yml @@ -1,16 +1,16 @@ name: Release Drafter -on: +on: workflow_dispatch: inputs: release_pr_number: - description: 'Release PR Number' + description: "Release PR Number" required: true - default: '' + default: "" tag: - description: 'Tag for release (manual input)' + description: "Tag for release (manual input)" required: true - default: '' + default: "" jobs: update_release_draft: @@ -36,6 +36,7 @@ jobs: - name: 🧬 Process Changelog run: | python process_changelog.py CHANGELOG.md ${{ github.event.inputs.release_pr_number }} + python summarize_changelog.py ${{ secrets.GITHUB_TOKEN }} ${{ secrets.OPENAI_API_KEY }} cat CHANGELOG.md - name: 🛫 Create Release @@ -46,4 +47,4 @@ jobs: prerelease: false draft: true env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/summarize_chanelog.py b/summarize_chanelog.py new file mode 100644 index 000000000000..75648dff1054 --- /dev/null +++ b/summarize_chanelog.py @@ -0,0 +1,166 @@ +"""Changelog v2 summary generator.""" + +import logging +import re +import sys +from typing import Dict + +import requests + + +def fetch_pr_details(owner: str, repo: str, pr_number: str, github_token: str) -> dict: + """Fetch details of a specific PR from GitHub.""" + url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}" + headers = {"Authorization": f"token {github_token}"} + response = requests.get(url, headers=headers, timeout=10) + if response.status_code == 200: + return response.json() + + logging.error( + "Failed to fetch PR details for PR #%s. Status code: %s", + pr_number, + response.status_code, + ) + return {} + + +def parse_and_fetch_pr_details( + markdown_text: str, owner: str, repo: str, github_token: str +) -> Dict[str, str]: + """Parse the markdown text and fetch details of PRs mentioned in the text.""" + sections = re.split(r"\n## ", markdown_text) + categories: Dict[str, str] = {} + + for section in sections: + split_section = section.split("\n", 1) + if len(split_section) < 2: + continue + + category_name = split_section[0].strip() + items_text = split_section[1].strip() + items = re.findall(r"- (?:\[.*?\] - )?(.*?) @.*? \(#(\d+)\)", items_text) + + for _, pr_number in items: + pr_details = fetch_pr_details(owner, repo, pr_number, github_token) + if pr_details: + try: + pr_info = { + "title": pr_details["title"], + "body": re.sub(r"\s+", " ", pr_details["body"].strip()).strip(), + } + except Exception as e: + logging.error( + "Failed to fetch PR details for PR #%s: %s", pr_number, e + ) + if category_name in categories: + categories[category_name].append(pr_info) # type: ignore + else: + categories[category_name] = [pr_info] # type: ignore + + return categories + + +def insert_summary_into_markdown( + markdown_text: str, category_name: str, summary: str +) -> str: + """Insert a summary into the markdown text directly under the specified category name.""" + marker = f"## {category_name}" + if marker in markdown_text: + # Find the position right after the category name + start_pos = markdown_text.find(marker) + len(marker) + # Find the position of the first newline after the category name to ensure we insert before any content + newline_pos = markdown_text.find("\n", start_pos) + if newline_pos != -1: + # Insert the summary right after the newline that follows the category name + # Ensuring it's on a new line and followed by two newlines before any subsequent content + updated_markdown = ( + markdown_text[: newline_pos + 1] + + "\n" + + summary + + markdown_text[newline_pos + 1 :] + ) + else: + # If there's no newline (e.g., end of file), just append the summary + updated_markdown = markdown_text + "\n\n" + summary + "\n" + return updated_markdown + + logging.error("Category '%s' not found in markdown.", category_name) + return markdown_text + + +def summarize_text_with_openai(text: str, openai_api_key: str) -> str: + """Summarize text using OpenAI's GPT model.""" + from openai import OpenAI # pylint: disable=C0415 + + openai = OpenAI(api_key=openai_api_key) + response = openai.chat.completions.create( + model="gpt-4", # noqa: E501 + messages=[ + { + "role": "system", + "content": "Summarize the following text in a concise way to describe what happened in the new release. This will be used on top of the changelog to provide a high-level overview of the changes. Make sure it is well-written, concise, structured and that it captures the essence of the text. It should read like a concise story.", # noqa: E501 # pylint: disable=C0301 + }, + {"role": "user", "content": text}, + ], + ) + return response.choices[0].message.content # type: ignore + + +def summarize_changelog_v2( + github_token: str, + openai_api_key: str, + owner: str = "OpenBB-finance", + repo: str = "OpenBBTerminal", + changelog_v2: str = "CHANGELOG.md", +) -> None: + """Summarize the Changelog v2 markdown text with PR details.""" + try: + with open(changelog_v2) as file: + logging.info("Reading file: %s", changelog_v2) + data = file.read() + except OSError as e: + logging.error("Failed to open or read file: %s", e) + return + + logging.info("Parsing and fetching PR details...") + categories = parse_and_fetch_pr_details(data, owner, repo, github_token) + + categories_of_interest = [ + "🚨 OpenBB Platform Breaking Changes", + "🦋 OpenBB Platform Enhancements", + "🐛 OpenBB Platform Bug Fixes", + "📚 OpenBB Documentation Changes", + ] + updated_markdown = data + + logging.info("Summarizing text with OpenAI...") + for category_of_interest in categories_of_interest: + if category_of_interest in categories: + pattern = r"\[.*?\]\(.*?\)|[*_`]" + aggregated_text = "\n".join( + [ + f"- {pr['title']}: {re.sub(pattern, '', pr['body'])}" # type: ignore + for pr in categories[category_of_interest] # type: ignore + ] + ) + summary = summarize_text_with_openai(aggregated_text, openai_api_key) + updated_markdown = insert_summary_into_markdown( + updated_markdown, category_of_interest, summary + ) + + with open(changelog_v2, "w") as file: + logging.info("Writing updated file: %s", changelog_v2) + file.write(updated_markdown) + + +if __name__ == "__main__": + if len(sys.argv) < 3: + logging.error( + "Usage: python summarize_changelog.py " + ) + sys.exit(1) + + token = sys.argv[1] + openai_key = sys.argv[2] + + summarize_changelog_v2(github_token=token, openai_api_key=openai_key)