From 218ddfcf3f0dc027779f505e498b1be33b73e26c Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 23 Jan 2025 15:02:13 -0500 Subject: [PATCH 01/70] Initial working release notes generator --- scripts/release_notes_generator.py | 275 +++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 scripts/release_notes_generator.py diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py new file mode 100644 index 0000000000..4b8a0f46bb --- /dev/null +++ b/scripts/release_notes_generator.py @@ -0,0 +1,275 @@ +import logging +import os +import re +import subprocess + +import tiktoken +from openai import OpenAI + +client = OpenAI( + api_key=os.environ.get("OPENAI_API_KEY"), +) + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +FIREFOX_VERSION_1 = "FIREFOX_BETA_133_BASE" +FIREFOX_VERSION_2 = "FIREFOX_BETA_134_BASE" +REPO_DIRECTORY = "hg_dir" +OUTPUT_FILE = f"release_notes_{FIREFOX_VERSION_2}.txt" +CHUNK_SIZE = 4000 + + +def run_hg_log(query, repo_dir): + try: + result = subprocess.run( + ["hg", "log", "-r", query], + cwd=repo_dir, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error running hg log: {e}") + return None + + +def get_token_count(text, model="gpt-4o-mini"): + encoding = tiktoken.encoding_for_model(model) + return len(encoding.encode(text)) + + +def split_into_chunks(commit_log, chunk_size, model="gpt-4"): + commit_blocks = commit_log.split("\n\n") + chunks = [] + current_chunk = [] + current_token_count = 0 + + for block in commit_blocks: + block_token_count = get_token_count(block, model=model) + + if current_token_count + block_token_count > chunk_size: + # Add the current chunk to the chunks list and start a new chunk + chunks.append("\n\n".join(current_chunk)) + current_chunk = [] + current_token_count = 0 + + current_chunk.append(block) + current_token_count += block_token_count + + # Add the last chunk if any content remains + if current_chunk: + chunks.append("\n\n".join(current_chunk)) + + return chunks + + +def summarize_with_gpt(input_text, step="summary"): + if step == "summary": + prompt = f"""You are an expert in summarizing commit logs for release notes, skilled at identifying important updates, features, and fixes while ensuring traceability. +I will provide you with a chunk of commit logs. Your task is to: +1. **Review the commit logs carefully.** +2. **Identify and summarize only those commits that are significant for release notes, such as user-facing changes, critical bug fixes, performance improvements, and new features.** +3. **Include the associated bug numbers in parentheses at the end of each summary item, if available.** +4. Use a concise bulleted list format. For each item, begin with a category tag like [Feature], [Fix], [Improvement], [Change], followed by a brief description and the bug number (e.g., bug 123456). + +Here is the chunk of commit logs: +{input_text} + +Output the summary in this format: +- [Category] Description of the change (bug XXXXX). +- [Category] Another important update (bug XXXXX, bug XXXXX). +""" + elif step == "final": + prompt = f"""You are an expert in creating professional, user-friendly release notes. +I will provide you with a combined summary of updates and fixes derived from commit logs. Your task is to: +1. **Review the provided summary carefully and polish it into a cohesive document.** +2. **Group the updates into categories such as Accessibility Improvements, Performance Optimizations, Security and Privacy, etc.** +3. **Ensure that each item in the release notes includes its corresponding bug number(s) in parentheses at the end, as provided in the summaries.** +4. Use simple, professional language suitable for both technical and non-technical audiences, and avoid overly technical jargon. +5. Begin with a short introductory paragraph summarizing the release, including the version number and key highlights. +6. MAKE SURE THESE ARE USER-FACING RELEASE NOTES. Avoid any references to code, functions etc. They should be about Firefox features. + +Here is an example of release notes: + +Accessibility Improvements + + Enhanced accessibility for scrolling events and text fragment navigation (bug 1926214). + Improved handling of anchor jumps to ensure accessibility events are fired correctly (bug 1926198). + Updated DevTools to better support High Contrast Mode, improving visibility of UI elements like charts and declarations (bug 1916391, bug 1926794, bug 1926851, bug 1926852). + +Android-Specific Changes + + Added support for autocorrect in Android's GeckoView backend (bug 1725806). + Removed the Extensions chevron icon from the Homepage Menu as part of the Android Menu Redesign (bug 1925005). + Improved handling of translation prompts, ensuring "Not now" cancels translation (bug 1913602). + Enabled biometric authentication for accessing saved logins on Android (bug 1932575). + +DevTools Enhancements + + Updated DevTools to better handle High Contrast Mode, improving visibility of UI elements like charts and declarations (bug 1916391, bug 1926794, bug 1926851, bug 1926852). + Refactored the Start Performance Analysis button style for better usability (bug 1926878). + Made the "WhyPaused" debugger panel a live region and added paused location information for better accessibility (bug 1843320). + Improved High Contrast Mode support for markup nodes and console borders (bug 1916688, bug 1931502). + +Performance Optimizations + + Optimized layout calculations for fixed-position frames in display ports to improve rendering performance (bug 1927375). + Improved handling of JavaScript IC (Inline Cache) operations to enhance performance (bug 1922981). + Enhanced memory handling for WebAssembly, increasing memory limits and enabling memory64 by default (bug 1931401, bug 1929590). + Optimized garbage collection by avoiding full GC during ongoing CC (bug 1932394). + +Security and Privacy + + Removed the security.external_protocol_requires_permission pref, simplifying external protocol handling (bug 1925479). + Updated CRLite filter channel to use experimental+deltas on Nightly for improved certificate revocation checks (bug 1927598). + Improved clipboard content analysis to handle multiple clipboard items securely (bug 1915351). + Enabled biometric authentication for accessing saved logins on Android (bug 1932575). + +Web Platform and Standards + + Implemented PushManager.supportedContentEncodings for better web push support (bug 1497430). + Added support for ReadableStreamBYOBReader.prototype.read(view, { min }) to align with web standards (bug 1864406). + Improved handling of text fragments and scrolling behavior for better web compatibility (bug 1907808). + Shipped js-string-builtins and improved WebAssembly memory handling (bug 1913964, bug 1932087). + +Localization and Internationalization + + Migrated necko error messages from properties to Fluent for better localization support (bug 1733498). + Updated various localization strings and configurations across Firefox and Mobile (multiple l10n bumps). + Improved handling of city/state in MLSuggest subjects (bug 1932671). + +UI and User Experience + + Updated the URL bar's search mode behavior and layout (bug 1921731, bug 1925532). + Improved tab dragging behavior by moving tabs when hitting 70% of their width (bug 1932425). + Enabled save and close functionality for tab groups (bug 1923652). + Added a restore tab group API to session management (bug 1932670). + Improved URL bar geolocation utilities and Yelp suggestion matching (bug 1932537, bug 1931964). + +Media and WebRTC + + Enabled simulcast for screensharing sources and added tests to ensure compatibility (bug 1692873). + Added AV1 codec support for WebRTC, including negotiation, parameter handling, and tests (bug 1921154). + Improved H264 handling in WebRTC tests and ensured consistent use of fake GMP plugins (bug 1534688). + +Miscellaneous + + Updated Sentry to version 7.16.0 for better error reporting (bug 1927169). + Improved filename sanitization for downloads to enhance security and usability (bug 1914858). + Enabled ScotchBonnet on Nightly for improved UI testing (bug 1916679). + Updated Rust dependencies (zerovec-derive, shlex) (bug 1932319, bug 1932316). + +This changelog focuses on user-visible changes, performance improvements, and security enhancements, providing a high-level overview of the most impactful updates in this Firefox release. + + +Here is the summarized list of updates: +{input_text} + +Output the release notes in this format: +Release Notes for Version {FIREFOX_VERSION_2} + +### Accessibility Improvements +- Enhanced accessibility for scrolling events and text fragment navigation (bug XXXXX). +- Improved handling of anchor jumps to ensure accessibility events are fired correctly (bug XXXXX). + +### Performance Optimizations +- Optimized layout calculations for fixed-position frames (bug XXXXX). +- Enhanced memory handling for WebAssembly, increasing memory limits (bug XXXXX, bug XXXXX). + +### Security and Privacy +- Removed outdated preferences for external protocol handling (bug XXXXX). +- Updated certificate revocation checks for improved security (bug XXXXX). +""" + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model="gpt-4o-mini", + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to generate summary or release notes." + + +def generate_summaries(commit_log): + chunks = split_into_chunks(commit_log, CHUNK_SIZE) + summaries = [summarize_with_gpt(chunk, step="summary") for chunk in chunks] + return summaries + + +def clean_commits(commit_log, keywords): + cleaned_commits = [] + commit_blocks = commit_log.split("\n\n") + + for block in commit_blocks: + if not any( + re.search(rf"\b{keyword}\b", block, re.IGNORECASE) for keyword in keywords + ): + cleaned_commits.append(block) + + return "\n\n".join(cleaned_commits) + + +def generate_release_notes(): + logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") + + logger.info("Finding the branching point commit...") + branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" + branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) + + if not branching_commit_output: + logger.error("Failed to find the branching point commit. Exiting.") + exit(1) + + branching_commit_hash = branching_commit_output.split(":")[1].split()[0] + logger.info(f"Branching point commit: {branching_commit_hash}") + + logger.info("Fetching the list of changes...") + changes_query = ( + f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" + ) + changes_output = run_hg_log(changes_query, REPO_DIRECTORY) + + if not changes_output: + logger.error("Failed to fetch the list of changes. Exiting.") + exit(1) + + logger.info("Cleaning commit log...") + keywords_to_remove = [ + "Backed out", + "a=testonly", + "a=release", + "DONTBUILD", + "add tests", + "disable test", + ] + cleaned_commits = clean_commits(changes_output, keywords_to_remove) + + # TEMP FOR NOW + cleaned_commits = cleaned_commits[0:20000] + + logger.info("Generating summaries for cleaned commits...") + summaries = generate_summaries(cleaned_commits) + + combined_summary = "\n\n".join(summaries) + + logger.info("Polishing combined summary with GPT...") + final_notes = summarize_with_gpt(combined_summary, step="final") + + with open(OUTPUT_FILE, "w") as file: + file.write(final_notes) + + logger.info(f"Release notes saved to {OUTPUT_FILE}") + + +if __name__ == "__main__": + generate_release_notes() From 51e7f348e46a7f0c517916f695fe62d4568ef707 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 24 Jan 2025 15:54:08 -0500 Subject: [PATCH 02/70] Fixed prompt and list generation --- scripts/release_notes_generator.py | 100 +++++++++++------------------ 1 file changed, 38 insertions(+), 62 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 4b8a0f46bb..1b58549d57 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -6,6 +6,8 @@ import tiktoken from openai import OpenAI +MODEL = "gpt-4o" + client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY"), ) @@ -13,10 +15,10 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -FIREFOX_VERSION_1 = "FIREFOX_BETA_133_BASE" -FIREFOX_VERSION_2 = "FIREFOX_BETA_134_BASE" +FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" +FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" REPO_DIRECTORY = "hg_dir" -OUTPUT_FILE = f"release_notes_{FIREFOX_VERSION_2}.txt" +OUTPUT_FILE = f"worthy_commits_{FIREFOX_VERSION_2}.txt" CHUNK_SIZE = 4000 @@ -35,7 +37,7 @@ def run_hg_log(query, repo_dir): return None -def get_token_count(text, model="gpt-4o-mini"): +def get_token_count(text, model=MODEL): encoding = tiktoken.encoding_for_model(model) return len(encoding.encode(text)) @@ -65,34 +67,20 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): return chunks -def summarize_with_gpt(input_text, step="summary"): - if step == "summary": - prompt = f"""You are an expert in summarizing commit logs for release notes, skilled at identifying important updates, features, and fixes while ensuring traceability. -I will provide you with a chunk of commit logs. Your task is to: -1. **Review the commit logs carefully.** -2. **Identify and summarize only those commits that are significant for release notes, such as user-facing changes, critical bug fixes, performance improvements, and new features.** -3. **Include the associated bug numbers in parentheses at the end of each summary item, if available.** -4. Use a concise bulleted list format. For each item, begin with a category tag like [Feature], [Fix], [Improvement], [Change], followed by a brief description and the bug number (e.g., bug 123456). +def summarize_with_gpt(input_text): + prompt = f""" +You are an expert in analyzing commit logs. I will provide you with a chunk of commit logs. Your task is to: +1. Identify commits or groups of commits that are relevant for potential release notes. Focus on changes that: + - Are meaningful to **end users**, such as new features or importan changes +2. Exclude: + - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. + - Changes that were made to the codebase that may be relevant to Mozilla engineers, but not to end users. These can include references to random functions, files, etc. + - Highly technical details or jargon in the descriptions that might confuse non-developers. +3. Use simple and user-friendly language for descriptions, particularly for end-user-facing changes. -Here is the chunk of commit logs: -{input_text} - -Output the summary in this format: -- [Category] Description of the change (bug XXXXX). -- [Category] Another important update (bug XXXXX, bug XXXXX). -""" - elif step == "final": - prompt = f"""You are an expert in creating professional, user-friendly release notes. -I will provide you with a combined summary of updates and fixes derived from commit logs. Your task is to: -1. **Review the provided summary carefully and polish it into a cohesive document.** -2. **Group the updates into categories such as Accessibility Improvements, Performance Optimizations, Security and Privacy, etc.** -3. **Ensure that each item in the release notes includes its corresponding bug number(s) in parentheses at the end, as provided in the summaries.** -4. Use simple, professional language suitable for both technical and non-technical audiences, and avoid overly technical jargon. -5. Begin with a short introductory paragraph summarizing the release, including the version number and key highlights. -6. MAKE SURE THESE ARE USER-FACING RELEASE NOTES. Avoid any references to code, functions etc. They should be about Firefox features. - -Here is an example of release notes: +Here is an example of release notes that were generated by another script. Do not follow the format, but use it to understand what kind of changes we want to include: +Firefox Changelog: 133 to 134 Accessibility Improvements Enhanced accessibility for scrolling events and text fragment navigation (bug 1926214). @@ -163,24 +151,12 @@ def summarize_with_gpt(input_text, step="summary"): This changelog focuses on user-visible changes, performance improvements, and security enhancements, providing a high-level overview of the most impactful updates in this Firefox release. - -Here is the summarized list of updates: +Here is the chunk of commit logs you need to focus on: {input_text} -Output the release notes in this format: -Release Notes for Version {FIREFOX_VERSION_2} +Here is the STRICT format I want you to follow. No extra text please: +- [Type of Change] Description of the change (bug XXXXX) -### Accessibility Improvements -- Enhanced accessibility for scrolling events and text fragment navigation (bug XXXXX). -- Improved handling of anchor jumps to ensure accessibility events are fired correctly (bug XXXXX). - -### Performance Optimizations -- Optimized layout calculations for fixed-position frames (bug XXXXX). -- Enhanced memory handling for WebAssembly, increasing memory limits (bug XXXXX, bug XXXXX). - -### Security and Privacy -- Removed outdated preferences for external protocol handling (bug XXXXX). -- Updated certificate revocation checks for improved security (bug XXXXX). """ try: @@ -191,18 +167,18 @@ def summarize_with_gpt(input_text, step="summary"): "content": prompt, } ], - model="gpt-4o-mini", - temperature=0.1, + model=MODEL, + temperature=0.2, ) return response.choices[0].message.content.strip() except Exception as e: logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to generate summary or release notes." + return "Error: Unable to generate summary." def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) - summaries = [summarize_with_gpt(chunk, step="summary") for chunk in chunks] + summaries = [summarize_with_gpt(chunk) for chunk in chunks] return summaries @@ -211,15 +187,20 @@ def clean_commits(commit_log, keywords): commit_blocks = commit_log.split("\n\n") for block in commit_blocks: - if not any( - re.search(rf"\b{keyword}\b", block, re.IGNORECASE) for keyword in keywords + if ( + not any( + re.search(rf"\b{keyword}\b", block, re.IGNORECASE) + for keyword in keywords + ) + and re.search(r"Bug \d+", block, re.IGNORECASE) + and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) ): cleaned_commits.append(block) return "\n\n".join(cleaned_commits) -def generate_release_notes(): +def generate_worthy_commits(): logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") logger.info("Finding the branching point commit...") @@ -253,23 +234,18 @@ def generate_release_notes(): "disable test", ] cleaned_commits = clean_commits(changes_output, keywords_to_remove) - - # TEMP FOR NOW - cleaned_commits = cleaned_commits[0:20000] + cleaned_commits = cleaned_commits[0:40000] logger.info("Generating summaries for cleaned commits...") summaries = generate_summaries(cleaned_commits) - combined_summary = "\n\n".join(summaries) - - logger.info("Polishing combined summary with GPT...") - final_notes = summarize_with_gpt(combined_summary, step="final") + combined_list = "\n\n".join(summaries) with open(OUTPUT_FILE, "w") as file: - file.write(final_notes) + file.write(combined_list) - logger.info(f"Release notes saved to {OUTPUT_FILE}") + logger.info(f"Worthy commits saved to {OUTPUT_FILE}") if __name__ == "__main__": - generate_release_notes() + generate_worthy_commits() From 6af501bfc1c77a4467acbe10d88f69ddd06bf0d0 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 27 Jan 2025 16:06:17 -0500 Subject: [PATCH 03/70] Fixed prompt and excluded Nightly --- scripts/release_notes_generator.py | 130 +++++++++-------------------- 1 file changed, 40 insertions(+), 90 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 1b58549d57..7168559c96 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -18,7 +18,7 @@ FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" REPO_DIRECTORY = "hg_dir" -OUTPUT_FILE = f"worthy_commits_{FIREFOX_VERSION_2}.txt" +OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" CHUNK_SIZE = 4000 @@ -69,94 +69,43 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): def summarize_with_gpt(input_text): prompt = f""" -You are an expert in analyzing commit logs. I will provide you with a chunk of commit logs. Your task is to: -1. Identify commits or groups of commits that are relevant for potential release notes. Focus on changes that: - - Are meaningful to **end users**, such as new features or importan changes -2. Exclude: - - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. - - Changes that were made to the codebase that may be relevant to Mozilla engineers, but not to end users. These can include references to random functions, files, etc. - - Highly technical details or jargon in the descriptions that might confuse non-developers. -3. Use simple and user-friendly language for descriptions, particularly for end-user-facing changes. - -Here is an example of release notes that were generated by another script. Do not follow the format, but use it to understand what kind of changes we want to include: - -Firefox Changelog: 133 to 134 -Accessibility Improvements - - Enhanced accessibility for scrolling events and text fragment navigation (bug 1926214). - Improved handling of anchor jumps to ensure accessibility events are fired correctly (bug 1926198). - Updated DevTools to better support High Contrast Mode, improving visibility of UI elements like charts and declarations (bug 1916391, bug 1926794, bug 1926851, bug 1926852). - -Android-Specific Changes - - Added support for autocorrect in Android's GeckoView backend (bug 1725806). - Removed the Extensions chevron icon from the Homepage Menu as part of the Android Menu Redesign (bug 1925005). - Improved handling of translation prompts, ensuring "Not now" cancels translation (bug 1913602). - Enabled biometric authentication for accessing saved logins on Android (bug 1932575). - -DevTools Enhancements - - Updated DevTools to better handle High Contrast Mode, improving visibility of UI elements like charts and declarations (bug 1916391, bug 1926794, bug 1926851, bug 1926852). - Refactored the Start Performance Analysis button style for better usability (bug 1926878). - Made the "WhyPaused" debugger panel a live region and added paused location information for better accessibility (bug 1843320). - Improved High Contrast Mode support for markup nodes and console borders (bug 1916688, bug 1931502). - -Performance Optimizations - - Optimized layout calculations for fixed-position frames in display ports to improve rendering performance (bug 1927375). - Improved handling of JavaScript IC (Inline Cache) operations to enhance performance (bug 1922981). - Enhanced memory handling for WebAssembly, increasing memory limits and enabling memory64 by default (bug 1931401, bug 1929590). - Optimized garbage collection by avoiding full GC during ongoing CC (bug 1932394). - -Security and Privacy - - Removed the security.external_protocol_requires_permission pref, simplifying external protocol handling (bug 1925479). - Updated CRLite filter channel to use experimental+deltas on Nightly for improved certificate revocation checks (bug 1927598). - Improved clipboard content analysis to handle multiple clipboard items securely (bug 1915351). - Enabled biometric authentication for accessing saved logins on Android (bug 1932575). - -Web Platform and Standards - - Implemented PushManager.supportedContentEncodings for better web push support (bug 1497430). - Added support for ReadableStreamBYOBReader.prototype.read(view, { min }) to align with web standards (bug 1864406). - Improved handling of text fragments and scrolling behavior for better web compatibility (bug 1907808). - Shipped js-string-builtins and improved WebAssembly memory handling (bug 1913964, bug 1932087). - -Localization and Internationalization - - Migrated necko error messages from properties to Fluent for better localization support (bug 1733498). - Updated various localization strings and configurations across Firefox and Mobile (multiple l10n bumps). - Improved handling of city/state in MLSuggest subjects (bug 1932671). - -UI and User Experience - - Updated the URL bar's search mode behavior and layout (bug 1921731, bug 1925532). - Improved tab dragging behavior by moving tabs when hitting 70% of their width (bug 1932425). - Enabled save and close functionality for tab groups (bug 1923652). - Added a restore tab group API to session management (bug 1932670). - Improved URL bar geolocation utilities and Yelp suggestion matching (bug 1932537, bug 1931964). - -Media and WebRTC - - Enabled simulcast for screensharing sources and added tests to ensure compatibility (bug 1692873). - Added AV1 codec support for WebRTC, including negotiation, parameter handling, and tests (bug 1921154). - Improved H264 handling in WebRTC tests and ensured consistent use of fake GMP plugins (bug 1534688). - -Miscellaneous - - Updated Sentry to version 7.16.0 for better error reporting (bug 1927169). - Improved filename sanitization for downloads to enhance security and usability (bug 1914858). - Enabled ScotchBonnet on Nightly for improved UI testing (bug 1916679). - Updated Rust dependencies (zerovec-derive, shlex) (bug 1932319, bug 1932316). - -This changelog focuses on user-visible changes, performance improvements, and security enhancements, providing a high-level overview of the most impactful updates in this Firefox release. - -Here is the chunk of commit logs you need to focus on: -{input_text} - -Here is the STRICT format I want you to follow. No extra text please: -- [Type of Change] Description of the change (bug XXXXX) - +You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: + +1. **Analyze Commit Logs**: + - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: + - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. + - Exclude: + - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. + - Highly technical details or jargon that might confuse non-developers. + +2. **Enhance Context**: + - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: + - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). + - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. + - Use your analysis to enhance clarity and add relevant context to the description. + +3. **Output Format**: + - Use simple, non-technical language suitable for release notes. + - Use the following strict format for each relevant commit: + - [Type of Change] Description of the change (bug XXXXX) + - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + +4. **Example Commit Logs**: + - Input: `- [Security] Enforce validateRequestHeaders in HTTP parser (bug 1931456)` + - **Step 1**: Identify insufficient details. "validateRequestHeaders" is unclear without understanding its role in the HTTP parser. + - **Step 2**: Contextual reasoning. This function likely enforces stricter checks on HTTP headers, mitigating potential attack vectors. + - Output: `[Security] Enhanced HTTP request validation by enforcing stricter header checks, reducing the risk of malformed or malicious requests (bug 1931456).` + +5. **Output Strictness**: + - The output must only be the final list, following the specified format. + - Ensure every description is clear, complete, and directly relevant to end users. + +6. **Input**: + Here is the chunk of commit logs you need to focus on: + {input_text} + +7. **Output**: + The output should just be the list. Nothing more and nothing less. """ try: @@ -194,6 +143,7 @@ def clean_commits(commit_log, keywords): ) and re.search(r"Bug \d+", block, re.IGNORECASE) and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) + and not re.search(r"nightly", block, re.IGNORECASE) ): cleaned_commits.append(block) @@ -239,7 +189,7 @@ def generate_worthy_commits(): logger.info("Generating summaries for cleaned commits...") summaries = generate_summaries(cleaned_commits) - combined_list = "\n\n".join(summaries) + combined_list = "\n".join(summaries) with open(OUTPUT_FILE, "w") as file: file.write(combined_list) From 98ea3a9e3049c47e3f35f14ba2e0c2858023ef07 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 28 Jan 2025 10:19:26 -0500 Subject: [PATCH 04/70] Added duplicate remover --- scripts/release_notes_generator.py | 33 ++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 7168559c96..57a481c134 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -82,7 +82,7 @@ def summarize_with_gpt(input_text): - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. - - Use your analysis to enhance clarity and add relevant context to the description. + - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. 3. **Output Format**: - Use simple, non-technical language suitable for release notes. @@ -117,7 +117,7 @@ def summarize_with_gpt(input_text): } ], model=MODEL, - temperature=0.2, + temperature=0.1, ) return response.choices[0].message.content.strip() except Exception as e: @@ -125,6 +125,32 @@ def summarize_with_gpt(input_text): return "Error: Unable to generate summary." +def remove_duplicates(input_text): + prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. + + Here is the list: + {input_text} + + The output should just be the list with the duplicates removed. Nothing more, nothing less. + """ + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to remove duplicates." + + def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) summaries = [summarize_with_gpt(chunk) for chunk in chunks] @@ -191,6 +217,9 @@ def generate_worthy_commits(): combined_list = "\n".join(summaries) + logger.info("Removing duplicates from the list...") + combined_list = remove_duplicates(combined_list) + with open(OUTPUT_FILE, "w") as file: file.write(combined_list) From a8e9880d996fac91fae8f3798f112eccd219036f Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 28 Jan 2025 11:17:02 -0500 Subject: [PATCH 05/70] Added additional filtering --- scripts/release_notes_generator.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 57a481c134..9d304df625 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -131,7 +131,7 @@ def remove_duplicates(input_text): Here is the list: {input_text} - The output should just be the list with the duplicates removed. Nothing more, nothing less. + The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. """ try: @@ -151,6 +151,31 @@ def remove_duplicates(input_text): return "Error: Unable to remove duplicates." +def remove_unworthy_commits(input_text): + prompt = f"""Review the following list of release notes and remove anything that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. + +Here is the list: +{input_text} + +Return the cleaned-up list in the same format. Do not add any text before or after the list.""" + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to remove unworthy commits." + + def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) summaries = [summarize_with_gpt(chunk) for chunk in chunks] @@ -220,6 +245,9 @@ def generate_worthy_commits(): logger.info("Removing duplicates from the list...") combined_list = remove_duplicates(combined_list) + logger.info("Removing unworthy commits from the list...") + combined_list = remove_unworthy_commits(combined_list) + with open(OUTPUT_FILE, "w") as file: file.write(combined_list) From afd0d2c6eafba84892274a7d2fdc372719b16a12 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 31 Jan 2025 15:19:56 -0500 Subject: [PATCH 06/70] Fixed prompt to clean --- scripts/release_notes_generator.py | 100 ++++++++++++++++------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 9d304df625..13cde398b9 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -19,7 +19,7 @@ FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" REPO_DIRECTORY = "hg_dir" OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" -CHUNK_SIZE = 4000 +CHUNK_SIZE = 5000 def run_hg_log(query, repo_dir): @@ -52,7 +52,6 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): block_token_count = get_token_count(block, model=model) if current_token_count + block_token_count > chunk_size: - # Add the current chunk to the chunks list and start a new chunk chunks.append("\n\n".join(current_chunk)) current_chunk = [] current_token_count = 0 @@ -60,7 +59,6 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): current_chunk.append(block) current_token_count += block_token_count - # Add the last chunk if any content remains if current_chunk: chunks.append("\n\n".join(current_chunk)) @@ -87,7 +85,7 @@ def summarize_with_gpt(input_text): 3. **Output Format**: - Use simple, non-technical language suitable for release notes. - Use the following strict format for each relevant commit: - - [Type of Change] Description of the change (bug XXXXX) + - [Type of Change] Description of the change (bug XXXXX) (reasoning: ) - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. 4. **Example Commit Logs**: @@ -152,12 +150,12 @@ def remove_duplicates(input_text): def remove_unworthy_commits(input_text): - prompt = f"""Review the following list of release notes and remove anything that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. + prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. Here is the list: {input_text} -Return the cleaned-up list in the same format. Do not add any text before or after the list.""" +Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" try: response = client.chat.completions.create( @@ -178,6 +176,8 @@ def remove_unworthy_commits(input_text): def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) + print(f"LENGTH OF CHUNKS: {len(chunks)}") + print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") summaries = [summarize_with_gpt(chunk) for chunk in chunks] return summaries @@ -196,62 +196,72 @@ def clean_commits(commit_log, keywords): and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) and not re.search(r"nightly", block, re.IGNORECASE) ): - cleaned_commits.append(block) + match = re.search(r"summary:\s+(.+)", block) + commit_summary = match.group(1) if match else None + cleaned_commits.append(commit_summary) return "\n\n".join(cleaned_commits) def generate_worthy_commits(): - logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") + # logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") - logger.info("Finding the branching point commit...") - branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" - branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) + # logger.info("Finding the branching point commit...") + # branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" + # branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) - if not branching_commit_output: - logger.error("Failed to find the branching point commit. Exiting.") - exit(1) + # if not branching_commit_output: + # logger.error("Failed to find the branching point commit. Exiting.") + # exit(1) - branching_commit_hash = branching_commit_output.split(":")[1].split()[0] - logger.info(f"Branching point commit: {branching_commit_hash}") + # branching_commit_hash = branching_commit_output.split(":")[1].split()[0] + # logger.info(f"Branching point commit: {branching_commit_hash}") - logger.info("Fetching the list of changes...") - changes_query = ( - f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" - ) - changes_output = run_hg_log(changes_query, REPO_DIRECTORY) + # logger.info("Fetching the list of changes...") + # changes_query = ( + # f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" + # ) + # changes_output = run_hg_log(changes_query, REPO_DIRECTORY) - if not changes_output: - logger.error("Failed to fetch the list of changes. Exiting.") - exit(1) + # if not changes_output: + # logger.error("Failed to fetch the list of changes. Exiting.") + # exit(1) - logger.info("Cleaning commit log...") - keywords_to_remove = [ - "Backed out", - "a=testonly", - "a=release", - "DONTBUILD", - "add tests", - "disable test", - ] - cleaned_commits = clean_commits(changes_output, keywords_to_remove) - cleaned_commits = cleaned_commits[0:40000] + # logger.info("Cleaning commit log...") + # keywords_to_remove = [ + # "Backed out", + # "a=testonly", + # "a=release", + # "DONTBUILD", + # "add tests", + # "disable test", + # ] + # cleaned_commits = clean_commits(changes_output, keywords_to_remove) + # # cleaned_commits = cleaned_commits[0:40000] - logger.info("Generating summaries for cleaned commits...") - summaries = generate_summaries(cleaned_commits) + # logger.info("Generating summaries for cleaned commits...") + # summaries = generate_summaries(cleaned_commits) - combined_list = "\n".join(summaries) + # combined_list = "\n".join(summaries) - logger.info("Removing duplicates from the list...") - combined_list = remove_duplicates(combined_list) + # # logger.info("Removing duplicates from the list...") + # # combined_list = remove_duplicates(combined_list) - logger.info("Removing unworthy commits from the list...") - combined_list = remove_unworthy_commits(combined_list) + # # logger.info("Removing unworthy commits from the list...") + # # combined_list = remove_unworthy_commits(combined_list) - with open(OUTPUT_FILE, "w") as file: - file.write(combined_list) + # with open(OUTPUT_FILE, "w") as file: + # file.write(combined_list) - logger.info(f"Worthy commits saved to {OUTPUT_FILE}") + # logger.info(f"Worthy commits saved to {OUTPUT_FILE}") + with open(OUTPUT_FILE, "r", encoding="utf-8") as file: + file_contents = file.read() + + cleaned_commits = remove_duplicates(file_contents) + cleaned_commits = remove_unworthy_commits(cleaned_commits) + + with open(OUTPUT_FILE, "w", encoding="utf-8") as file: + file.write(cleaned_commits) if __name__ == "__main__": From ce3bdceecf6c4b0c6f9e399617a594c1df5d5612 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 31 Jan 2025 19:38:25 -0500 Subject: [PATCH 07/70] Added extra conversation --- scripts/release_notes_generator.py | 589 ++++++++++++++++------------- 1 file changed, 331 insertions(+), 258 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 13cde398b9..dd4031106d 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -1,9 +1,9 @@ -import logging +# import logging import os -import re -import subprocess -import tiktoken +# import re +# import subprocess +# import tiktoken from openai import OpenAI MODEL = "gpt-4o" @@ -12,257 +12,330 @@ api_key=os.environ.get("OPENAI_API_KEY"), ) -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" -FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" -REPO_DIRECTORY = "hg_dir" -OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" -CHUNK_SIZE = 5000 - - -def run_hg_log(query, repo_dir): - try: - result = subprocess.run( - ["hg", "log", "-r", query], - cwd=repo_dir, - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() - except subprocess.CalledProcessError as e: - print(f"Error running hg log: {e}") - return None - - -def get_token_count(text, model=MODEL): - encoding = tiktoken.encoding_for_model(model) - return len(encoding.encode(text)) - - -def split_into_chunks(commit_log, chunk_size, model="gpt-4"): - commit_blocks = commit_log.split("\n\n") - chunks = [] - current_chunk = [] - current_token_count = 0 - - for block in commit_blocks: - block_token_count = get_token_count(block, model=model) - - if current_token_count + block_token_count > chunk_size: - chunks.append("\n\n".join(current_chunk)) - current_chunk = [] - current_token_count = 0 - - current_chunk.append(block) - current_token_count += block_token_count - - if current_chunk: - chunks.append("\n\n".join(current_chunk)) - - return chunks - - -def summarize_with_gpt(input_text): - prompt = f""" -You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: - -1. **Analyze Commit Logs**: - - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: - - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. - - Exclude: - - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. - - Highly technical details or jargon that might confuse non-developers. - -2. **Enhance Context**: - - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: - - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). - - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. - - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. - -3. **Output Format**: - - Use simple, non-technical language suitable for release notes. - - Use the following strict format for each relevant commit: - - [Type of Change] Description of the change (bug XXXXX) (reasoning: ) - - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. - -4. **Example Commit Logs**: - - Input: `- [Security] Enforce validateRequestHeaders in HTTP parser (bug 1931456)` - - **Step 1**: Identify insufficient details. "validateRequestHeaders" is unclear without understanding its role in the HTTP parser. - - **Step 2**: Contextual reasoning. This function likely enforces stricter checks on HTTP headers, mitigating potential attack vectors. - - Output: `[Security] Enhanced HTTP request validation by enforcing stricter header checks, reducing the risk of malformed or malicious requests (bug 1931456).` - -5. **Output Strictness**: - - The output must only be the final list, following the specified format. - - Ensure every description is clear, complete, and directly relevant to end users. - -6. **Input**: - Here is the chunk of commit logs you need to focus on: - {input_text} - -7. **Output**: - The output should just be the list. Nothing more and nothing less. -""" - - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to generate summary." - - -def remove_duplicates(input_text): - prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. - - Here is the list: - {input_text} - - The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. - """ - - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to remove duplicates." - - -def remove_unworthy_commits(input_text): - prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. - -Here is the list: -{input_text} - -Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" - - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to remove unworthy commits." - - -def generate_summaries(commit_log): - chunks = split_into_chunks(commit_log, CHUNK_SIZE) - print(f"LENGTH OF CHUNKS: {len(chunks)}") - print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") - summaries = [summarize_with_gpt(chunk) for chunk in chunks] - return summaries - - -def clean_commits(commit_log, keywords): - cleaned_commits = [] - commit_blocks = commit_log.split("\n\n") - - for block in commit_blocks: - if ( - not any( - re.search(rf"\b{keyword}\b", block, re.IGNORECASE) - for keyword in keywords - ) - and re.search(r"Bug \d+", block, re.IGNORECASE) - and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) - and not re.search(r"nightly", block, re.IGNORECASE) - ): - match = re.search(r"summary:\s+(.+)", block) - commit_summary = match.group(1) if match else None - cleaned_commits.append(commit_summary) - - return "\n\n".join(cleaned_commits) - - -def generate_worthy_commits(): - # logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") - - # logger.info("Finding the branching point commit...") - # branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" - # branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) - - # if not branching_commit_output: - # logger.error("Failed to find the branching point commit. Exiting.") - # exit(1) - - # branching_commit_hash = branching_commit_output.split(":")[1].split()[0] - # logger.info(f"Branching point commit: {branching_commit_hash}") - - # logger.info("Fetching the list of changes...") - # changes_query = ( - # f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" - # ) - # changes_output = run_hg_log(changes_query, REPO_DIRECTORY) - - # if not changes_output: - # logger.error("Failed to fetch the list of changes. Exiting.") - # exit(1) - - # logger.info("Cleaning commit log...") - # keywords_to_remove = [ - # "Backed out", - # "a=testonly", - # "a=release", - # "DONTBUILD", - # "add tests", - # "disable test", - # ] - # cleaned_commits = clean_commits(changes_output, keywords_to_remove) - # # cleaned_commits = cleaned_commits[0:40000] - - # logger.info("Generating summaries for cleaned commits...") - # summaries = generate_summaries(cleaned_commits) - - # combined_list = "\n".join(summaries) - - # # logger.info("Removing duplicates from the list...") - # # combined_list = remove_duplicates(combined_list) - - # # logger.info("Removing unworthy commits from the list...") - # # combined_list = remove_unworthy_commits(combined_list) - - # with open(OUTPUT_FILE, "w") as file: - # file.write(combined_list) - - # logger.info(f"Worthy commits saved to {OUTPUT_FILE}") - with open(OUTPUT_FILE, "r", encoding="utf-8") as file: - file_contents = file.read() - - cleaned_commits = remove_duplicates(file_contents) - cleaned_commits = remove_unworthy_commits(cleaned_commits) - - with open(OUTPUT_FILE, "w", encoding="utf-8") as file: - file.write(cleaned_commits) - - -if __name__ == "__main__": - generate_worthy_commits() +# logging.basicConfig(level=logging.INFO) +# logger = logging.getLogger(__name__) + +# FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" +# FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" +# REPO_DIRECTORY = "hg_dir" +# OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" +# CHUNK_SIZE = 5000 + + +# def run_hg_log(query, repo_dir): +# try: +# result = subprocess.run( +# ["hg", "log", "-r", query], +# cwd=repo_dir, +# capture_output=True, +# text=True, +# check=True, +# ) +# return result.stdout.strip() +# except subprocess.CalledProcessError as e: +# print(f"Error running hg log: {e}") +# return None + + +# def get_token_count(text, model=MODEL): +# encoding = tiktoken.encoding_for_model(model) +# return len(encoding.encode(text)) + + +# def split_into_chunks(commit_log, chunk_size, model="gpt-4"): +# commit_blocks = commit_log.split("\n\n") +# chunks = [] +# current_chunk = [] +# current_token_count = 0 + +# for block in commit_blocks: +# block_token_count = get_token_count(block, model=model) + +# if current_token_count + block_token_count > chunk_size: +# chunks.append("\n\n".join(current_chunk)) +# current_chunk = [] +# current_token_count = 0 + +# current_chunk.append(block) +# current_token_count += block_token_count + +# if current_chunk: +# chunks.append("\n\n".join(current_chunk)) + +# return chunks + + +# def summarize_with_gpt(input_text): +# prompt = f""" +# You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: + +# 1. **Analyze Commit Logs**: +# - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: +# - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. +# - Exclude: +# - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. +# - Highly technical details or jargon that might confuse non-developers. + +# 2. **Enhance Context**: +# - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: +# - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). +# - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. +# - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. + +# 3. **Output Format**: +# - Use simple, non-technical language suitable for release notes. +# - Use the following strict format for each relevant commit, in CSV FORMAT: +# [Type of Change],Description of the change,Bug XXXX,Reasoning behind the change (if necessary) +# - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + +# 4. **Output Strictness**: +# - The output must only be the final list, following the specified format. +# - Ensure every description is clear, complete, and directly relevant to end users. + +# 6. **Input**: +# Here is the chunk of commit logs you need to focus on: +# {input_text} + +# 7. **Output**: +# The output should just be the list. Nothing more and nothing less. +# """ + +# try: +# response = client.chat.completions.create( +# messages=[ +# { +# "role": "user", +# "content": prompt, +# } +# ], +# model=MODEL, +# temperature=0.1, +# ) +# return response.choices[0].message.content.strip() +# except Exception as e: +# logger.error(f"Error while calling OpenAI API: {e}") +# return "Error: Unable to generate summary." + + +# def remove_duplicates(input_text): +# prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. + +# Here is the list: +# {input_text} + +# The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. +# """ + +# try: +# response = client.chat.completions.create( +# messages=[ +# { +# "role": "user", +# "content": prompt, +# } +# ], +# model=MODEL, +# temperature=0.1, +# ) +# return response.choices[0].message.content.strip() +# except Exception as e: +# logger.error(f"Error while calling OpenAI API: {e}") +# return "Error: Unable to remove duplicates." + + +# def remove_unworthy_commits(input_text): +# prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. + +# Here is the list: +# {input_text} + +# Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" + +# try: +# response = client.chat.completions.create( +# messages=[ +# { +# "role": "user", +# "content": prompt, +# } +# ], +# model=MODEL, +# temperature=0.1, +# ) +# return response.choices[0].message.content.strip() +# except Exception as e: +# logger.error(f"Error while calling OpenAI API: {e}") +# return "Error: Unable to remove unworthy commits." + + +# def generate_summaries(commit_log): +# chunks = split_into_chunks(commit_log, CHUNK_SIZE) +# print(f"LENGTH OF CHUNKS: {len(chunks)}") +# print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") +# summaries = [summarize_with_gpt(chunk) for chunk in chunks] +# return summaries + + +# def clean_commits(commit_log, keywords): +# cleaned_commits = [] +# commit_blocks = commit_log.split("\n\n") + +# for block in commit_blocks: +# if ( +# not any( +# re.search(rf"\b{keyword}\b", block, re.IGNORECASE) +# for keyword in keywords +# ) +# and re.search(r"Bug \d+", block, re.IGNORECASE) +# and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) +# and not re.search(r"nightly", block, re.IGNORECASE) +# ): +# match = re.search(r"summary:\s+(.+)", block) +# commit_summary = match.group(1) if match else None +# cleaned_commits.append(commit_summary) + +# return "\n\n".join(cleaned_commits) + + +# def generate_worthy_commits(): +# logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") + +# logger.info("Finding the branching point commit...") +# branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" +# branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) + +# if not branching_commit_output: +# logger.error("Failed to find the branching point commit. Exiting.") +# exit(1) + +# branching_commit_hash = branching_commit_output.split(":")[1].split()[0] +# logger.info(f"Branching point commit: {branching_commit_hash}") + +# logger.info("Fetching the list of changes...") +# changes_query = ( +# f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" +# ) +# changes_output = run_hg_log(changes_query, REPO_DIRECTORY) + +# if not changes_output: +# logger.error("Failed to fetch the list of changes. Exiting.") +# exit(1) + +# logger.info("Cleaning commit log...") +# keywords_to_remove = [ +# "Backed out", +# "a=testonly", +# "a=release", +# "DONTBUILD", +# "add tests", +# "disable test", +# ] +# cleaned_commits = clean_commits(changes_output, keywords_to_remove) +# # cleaned_commits = cleaned_commits[0:40000] + +# logger.info("Generating summaries for cleaned commits...") +# summaries = generate_summaries(cleaned_commits) + +# combined_list = "\n".join(summaries) + +# # logger.info("Removing duplicates from the list...") +# # combined_list = remove_duplicates(combined_list) + +# # logger.info("Removing unworthy commits from the list...") +# # combined_list = remove_unworthy_commits(combined_list) + +# with open(OUTPUT_FILE, "w") as file: +# file.write(combined_list) + +# logger.info(f"Worthy commits saved to {OUTPUT_FILE}") +# # with open(OUTPUT_FILE, "r", encoding="utf-8") as file: +# # file_contents = file.read() + +# # cleaned_commits = remove_duplicates(file_contents) +# # cleaned_commits = remove_unworthy_commits(cleaned_commits) + +# # with open(OUTPUT_FILE, "w", encoding="utf-8") as file: +# # file.write(cleaned_commits) + + +# if __name__ == "__main__": +# generate_worthy_commits() + + +# import openai + +# MODEL = "gpt-4o" + +# # Pre-existing conversation + +# message1 = """You are a software developer who has to change the code below by following a given Code Review. +# The Code Review is attached to the line of code starting with the line number Start_Line and +# ending with the line number End_Line. There are also characters (- and +) showing where a line +# of code in the diff hunk has been removed (marked with a - at the beginning of the line) or added +# (marked with a + at the beginning of the line). The New Code Diff should be in the correct Git diff +# format, where added lines (on top of the diff hunk) are denoted with the + character. Lines removed +# from the Diff Hunk should be denoted with the - character. Your output must not contain any trailing +# tokens/characters. Your output must adhere to the following format: "Short Explanation: [...] + +# New Code Diff: [...]" + +# Start_Line: +# 1469 + +# End_Line: +# 1470 + +# Code Review: +# {'raw': 'Please fix this warning while you are here.'} + +# Diff Hunk: + +# Code Review: +# {comment_content} + +# Diff Hunk: +# ``` +# diff -u b/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp +# --- b/xpfe/appshell/AppWindow.cpp +# +++ b/xpfe/appshell/AppWindow.cpp +# @@ -1466,8 +1466,9 @@ +# nsresult errorCode; +# int32_t zLevel = stateString.ToInteger(&errorCode); +# if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) && +# - zLevel <= int32_t(highestZ)) +# + zLevel <= int32_t(highestZ)) { +# SetZLevel(zLevel); +# + } +# } + +# return gotState; +# ``` +# """ + +# message2 = """('Short Explanation: The code review suggests fixing a warning related to type conversion by explicitly casting `lowestZ` and `highestZ` to `int32_t`.\n\nNew Code Diff:\n```diff\ndiff --git a/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp\n--- a/xpfe/appshell/AppWindow.cpp\n+++ b/xpfe/appshell/AppWindow.cpp\n@@ -1463,11 +1463,12 @@\n // zlevel\n windowElement->GetAttribute(ZLEVEL_ATTRIBUTE, stateString);\n if (!stateString.IsEmpty()) {\n nsresult errorCode;\n int32_t zLevel = stateString.ToInteger(&errorCode);\n- if (NS_SUCCEEDED(errorCode) && zLevel >= lowestZ && zLevel <= highestZ)\n+ if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) &&\n+ zLevel <= int32_t(highestZ))\n SetZLevel(zLevel);\n }\n \n return gotState;\n }\n```', '\n You are a software developer who has to change the code below by following a given Code Review.\n The Code Review is attached to the line of code starting with the line number Start_Line and\n ending with the line number End_Line. There are also characters (- and +) showing where a line\n of code in the diff hunk has been removed (marked with a - at the beginning of the line) or added\n (marked with a + at the beginning of the line). The New Code Diff should be in the correct Git diff\n format, where added lines (on top of the diff hunk) are denoted with the + character. Lines removed\n from the Diff Hunk should be denoted with the - character. Your output must not contain any trailing\n tokens/characters. Your output must adhere to the following format: "Short Explanation: [...] \n\n New Code Diff: [...]"\n\n Start_Line:\n 1469\n\n End_Line:\n 1470\n\n Code Review:\n {\'raw\': \'Please fix this warning while you are here.\'}\n\n Diff Hunk:\n ```\n diff --git a/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp\n--- a/xpfe/appshell/AppWindow.cpp\n+++ b/xpfe/appshell/AppWindow.cpp\n@@ -1463,11 +1463,12 @@\n // zlevel\n windowElement->GetAttribute(ZLEVEL_ATTRIBUTE, stateString);\n if (!stateString.IsEmpty()) {\n nsresult errorCode;\n int32_t zLevel = stateString.ToInteger(&errorCode);\n- if (NS_SUCCEEDED(errorCode) && zLevel >= lowestZ && zLevel <= highestZ)\n+ if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) &&\n+ zLevel <= int32_t(highestZ))\n SetZLevel(zLevel);\n }\n \n return gotState;\n }\n\n ```\n ') +# """ + +# messages = [ +# {"role": "user", "content": message1}, +# {"role": "assistant", "content": message2} +# ] + +# def chat(prompt): +# global messages + +# messages.append({"role": "user", "content": prompt}) + +# response = client.chat.completions.create( +# messages=messages, +# model=MODEL, +# temperature=0.1, +# ) +# return response.choices[0].message.content.strip() + +# # assistant_message = response["choices"][0]["message"]["content"].strip() + +# # messages.append({"role": "assistant", "content": assistant_message}) + +# # return assistant_message + +# print(chat("How did you know that this was the fix?")) From bc1dd9cf0e53f98d281e822e9656394d113e5b43 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Feb 2025 10:35:38 -0500 Subject: [PATCH 08/70] New prompt --- scripts/release_notes_generator.py | 531 +++++++++++++++-------------- 1 file changed, 279 insertions(+), 252 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index dd4031106d..a2ed00998d 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -1,9 +1,9 @@ -# import logging +import logging import os +import re +import subprocess -# import re -# import subprocess -# import tiktoken +import tiktoken from openai import OpenAI MODEL = "gpt-4o" @@ -12,254 +12,281 @@ api_key=os.environ.get("OPENAI_API_KEY"), ) -# logging.basicConfig(level=logging.INFO) -# logger = logging.getLogger(__name__) - -# FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" -# FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" -# REPO_DIRECTORY = "hg_dir" -# OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" -# CHUNK_SIZE = 5000 - - -# def run_hg_log(query, repo_dir): -# try: -# result = subprocess.run( -# ["hg", "log", "-r", query], -# cwd=repo_dir, -# capture_output=True, -# text=True, -# check=True, -# ) -# return result.stdout.strip() -# except subprocess.CalledProcessError as e: -# print(f"Error running hg log: {e}") -# return None - - -# def get_token_count(text, model=MODEL): -# encoding = tiktoken.encoding_for_model(model) -# return len(encoding.encode(text)) - - -# def split_into_chunks(commit_log, chunk_size, model="gpt-4"): -# commit_blocks = commit_log.split("\n\n") -# chunks = [] -# current_chunk = [] -# current_token_count = 0 - -# for block in commit_blocks: -# block_token_count = get_token_count(block, model=model) - -# if current_token_count + block_token_count > chunk_size: -# chunks.append("\n\n".join(current_chunk)) -# current_chunk = [] -# current_token_count = 0 - -# current_chunk.append(block) -# current_token_count += block_token_count - -# if current_chunk: -# chunks.append("\n\n".join(current_chunk)) - -# return chunks - - -# def summarize_with_gpt(input_text): -# prompt = f""" -# You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: - -# 1. **Analyze Commit Logs**: -# - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: -# - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. -# - Exclude: -# - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. -# - Highly technical details or jargon that might confuse non-developers. - -# 2. **Enhance Context**: -# - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: -# - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). -# - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. -# - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. - -# 3. **Output Format**: -# - Use simple, non-technical language suitable for release notes. -# - Use the following strict format for each relevant commit, in CSV FORMAT: -# [Type of Change],Description of the change,Bug XXXX,Reasoning behind the change (if necessary) -# - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. - -# 4. **Output Strictness**: -# - The output must only be the final list, following the specified format. -# - Ensure every description is clear, complete, and directly relevant to end users. - -# 6. **Input**: -# Here is the chunk of commit logs you need to focus on: -# {input_text} - -# 7. **Output**: -# The output should just be the list. Nothing more and nothing less. -# """ - -# try: -# response = client.chat.completions.create( -# messages=[ -# { -# "role": "user", -# "content": prompt, -# } -# ], -# model=MODEL, -# temperature=0.1, -# ) -# return response.choices[0].message.content.strip() -# except Exception as e: -# logger.error(f"Error while calling OpenAI API: {e}") -# return "Error: Unable to generate summary." - - -# def remove_duplicates(input_text): -# prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. - -# Here is the list: -# {input_text} - -# The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. -# """ - -# try: -# response = client.chat.completions.create( -# messages=[ -# { -# "role": "user", -# "content": prompt, -# } -# ], -# model=MODEL, -# temperature=0.1, -# ) -# return response.choices[0].message.content.strip() -# except Exception as e: -# logger.error(f"Error while calling OpenAI API: {e}") -# return "Error: Unable to remove duplicates." - - -# def remove_unworthy_commits(input_text): -# prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. - -# Here is the list: -# {input_text} - -# Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" - -# try: -# response = client.chat.completions.create( -# messages=[ -# { -# "role": "user", -# "content": prompt, -# } -# ], -# model=MODEL, -# temperature=0.1, -# ) -# return response.choices[0].message.content.strip() -# except Exception as e: -# logger.error(f"Error while calling OpenAI API: {e}") -# return "Error: Unable to remove unworthy commits." - - -# def generate_summaries(commit_log): -# chunks = split_into_chunks(commit_log, CHUNK_SIZE) -# print(f"LENGTH OF CHUNKS: {len(chunks)}") -# print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") -# summaries = [summarize_with_gpt(chunk) for chunk in chunks] -# return summaries - - -# def clean_commits(commit_log, keywords): -# cleaned_commits = [] -# commit_blocks = commit_log.split("\n\n") - -# for block in commit_blocks: -# if ( -# not any( -# re.search(rf"\b{keyword}\b", block, re.IGNORECASE) -# for keyword in keywords -# ) -# and re.search(r"Bug \d+", block, re.IGNORECASE) -# and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) -# and not re.search(r"nightly", block, re.IGNORECASE) -# ): -# match = re.search(r"summary:\s+(.+)", block) -# commit_summary = match.group(1) if match else None -# cleaned_commits.append(commit_summary) - -# return "\n\n".join(cleaned_commits) - - -# def generate_worthy_commits(): -# logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") - -# logger.info("Finding the branching point commit...") -# branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" -# branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) - -# if not branching_commit_output: -# logger.error("Failed to find the branching point commit. Exiting.") -# exit(1) - -# branching_commit_hash = branching_commit_output.split(":")[1].split()[0] -# logger.info(f"Branching point commit: {branching_commit_hash}") - -# logger.info("Fetching the list of changes...") -# changes_query = ( -# f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" -# ) -# changes_output = run_hg_log(changes_query, REPO_DIRECTORY) - -# if not changes_output: -# logger.error("Failed to fetch the list of changes. Exiting.") -# exit(1) - -# logger.info("Cleaning commit log...") -# keywords_to_remove = [ -# "Backed out", -# "a=testonly", -# "a=release", -# "DONTBUILD", -# "add tests", -# "disable test", -# ] -# cleaned_commits = clean_commits(changes_output, keywords_to_remove) -# # cleaned_commits = cleaned_commits[0:40000] - -# logger.info("Generating summaries for cleaned commits...") -# summaries = generate_summaries(cleaned_commits) - -# combined_list = "\n".join(summaries) - -# # logger.info("Removing duplicates from the list...") -# # combined_list = remove_duplicates(combined_list) - -# # logger.info("Removing unworthy commits from the list...") -# # combined_list = remove_unworthy_commits(combined_list) - -# with open(OUTPUT_FILE, "w") as file: -# file.write(combined_list) - -# logger.info(f"Worthy commits saved to {OUTPUT_FILE}") -# # with open(OUTPUT_FILE, "r", encoding="utf-8") as file: -# # file_contents = file.read() - -# # cleaned_commits = remove_duplicates(file_contents) -# # cleaned_commits = remove_unworthy_commits(cleaned_commits) - -# # with open(OUTPUT_FILE, "w", encoding="utf-8") as file: -# # file.write(cleaned_commits) - - -# if __name__ == "__main__": -# generate_worthy_commits() +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" +FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" +REPO_DIRECTORY = "hg_dir" +OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" +CHUNK_SIZE = 5000 + + +def run_hg_log(query, repo_dir): + try: + result = subprocess.run( + ["hg", "log", "-r", query], + cwd=repo_dir, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"Error running hg log: {e}") + return None + + +def get_token_count(text, model=MODEL): + encoding = tiktoken.encoding_for_model(model) + return len(encoding.encode(text)) + + +def split_into_chunks(commit_log, chunk_size, model="gpt-4"): + commit_blocks = commit_log.split("\n\n") + chunks = [] + current_chunk = [] + current_token_count = 0 + + for block in commit_blocks: + block_token_count = get_token_count(block, model=model) + + if current_token_count + block_token_count > chunk_size: + chunks.append("\n\n".join(current_chunk)) + current_chunk = [] + current_token_count = 0 + + current_chunk.append(block) + current_token_count += block_token_count + + if current_chunk: + chunks.append("\n\n".join(current_chunk)) + + return chunks + + +def summarize_with_gpt(input_text): + # prompt = f""" + # You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: + + # 1. **Analyze Commit Logs**: + # - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: + # - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. + # - Exclude: + # - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. + # - Highly technical details or jargon that might confuse non-developers. + + # 2. **Enhance Context**: + # - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: + # - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). + # - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. + # - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. + + # 3. **Output Format**: + # - Use simple, non-technical language suitable for release notes. + # - Use the following strict format for each relevant commit, in CSV FORMAT: + # [Type of Change],Description of the change,Bug XXXX,Reasoning behind the change (if necessary) + # - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + + # 4. **Output Strictness**: + # - The output must only be the final list, following the specified format. + # - Ensure every description is clear, complete, and directly relevant to end users. + + # 6. **Input**: + # Here is the chunk of commit logs you need to focus on: + # {input_text} + + # 7. **Output**: + # The output should just be the list. Nothing more and nothing less. + # """ + prompt = f""" +You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: + +1. **Analyze Commit Logs**: + - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: + - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. + - Exclude: + - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. + - Highly technical details or jargon that might confuse non-developers. + +2. **Output Format**: + - Use simple, non-technical language suitable for release notes. + - Use the following strict format for each relevant commit, in CSV FORMAT: +[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users + - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + +3. **Output Strictness**: + - The output must only be the final list, following the specified format. + - Ensure every description is clear, complete, and directly relevant to end users. + +4. **Input**: + Here is the chunk of commit logs you need to focus on: + {input_text} + +5. **Output**: + The output should just be the list. Nothing more and nothing less. +""" + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to generate summary." + + +def remove_duplicates(input_text): + prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. + + Here is the list: + {input_text} + + The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. + """ + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to remove duplicates." + + +def remove_unworthy_commits(input_text): + prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. + +Here is the list: +{input_text} + +Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" + + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to remove unworthy commits." + + +def generate_summaries(commit_log): + chunks = split_into_chunks(commit_log, CHUNK_SIZE) + print(f"LENGTH OF CHUNKS: {len(chunks)}") + print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") + summaries = [summarize_with_gpt(chunk) for chunk in chunks] + return summaries + + +def clean_commits(commit_log, keywords): + cleaned_commits = [] + commit_blocks = commit_log.split("\n\n") + + for block in commit_blocks: + if ( + not any( + re.search(rf"\b{keyword}\b", block, re.IGNORECASE) + for keyword in keywords + ) + and re.search(r"Bug \d+", block, re.IGNORECASE) + and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) + and not re.search(r"nightly", block, re.IGNORECASE) + ): + match = re.search(r"summary:\s+(.+)", block) + commit_summary = match.group(1) if match else None + cleaned_commits.append(commit_summary) + + return "\n\n".join(cleaned_commits) + + +def generate_worthy_commits(): + logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") + + logger.info("Finding the branching point commit...") + branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" + branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) + + if not branching_commit_output: + logger.error("Failed to find the branching point commit. Exiting.") + exit(1) + + branching_commit_hash = branching_commit_output.split(":")[1].split()[0] + logger.info(f"Branching point commit: {branching_commit_hash}") + + logger.info("Fetching the list of changes...") + changes_query = ( + f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" + ) + changes_output = run_hg_log(changes_query, REPO_DIRECTORY) + + if not changes_output: + logger.error("Failed to fetch the list of changes. Exiting.") + exit(1) + + logger.info("Cleaning commit log...") + keywords_to_remove = [ + "Backed out", + "a=testonly", + "a=release", + "DONTBUILD", + "add tests", + "disable test", + ] + cleaned_commits = clean_commits(changes_output, keywords_to_remove) + # cleaned_commits = cleaned_commits[0:40000] + + logger.info("Generating summaries for cleaned commits...") + summaries = generate_summaries(cleaned_commits) + + combined_list = "\n".join(summaries) + + # logger.info("Removing duplicates from the list...") + # combined_list = remove_duplicates(combined_list) + + # logger.info("Removing unworthy commits from the list...") + # combined_list = remove_unworthy_commits(combined_list) + + with open(OUTPUT_FILE, "w") as file: + file.write(combined_list) + + logger.info(f"Worthy commits saved to {OUTPUT_FILE}") + # with open(OUTPUT_FILE, "r", encoding="utf-8") as file: + # file_contents = file.read() + + # cleaned_commits = remove_duplicates(file_contents) + # cleaned_commits = remove_unworthy_commits(cleaned_commits) + + # with open(OUTPUT_FILE, "w", encoding="utf-8") as file: + # file.write(cleaned_commits) + + +if __name__ == "__main__": + generate_worthy_commits() # import openai From 6ba5b88c17d8fe5bba6e389128676c3e1163bbf7 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Feb 2025 12:44:39 -0500 Subject: [PATCH 09/70] Changed prompt --- scripts/release_notes_generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index a2ed00998d..7d85fde3a0 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -100,7 +100,7 @@ def summarize_with_gpt(input_text): # The output should just be the list. Nothing more and nothing less. # """ prompt = f""" -You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: +You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: 1. **Analyze Commit Logs**: - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: @@ -199,7 +199,9 @@ def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) print(f"LENGTH OF CHUNKS: {len(chunks)}") print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") - summaries = [summarize_with_gpt(chunk) for chunk in chunks] + # summaries = [summarize_with_gpt(chunk) for chunk in chunks] + + summaries = [summarize_with_gpt(chunks[0])] return summaries From be742b1e958a600417fd2513ba2f0cc21786d326 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 7 Feb 2025 12:11:06 -0500 Subject: [PATCH 10/70] Made prompt more strict --- scripts/release_notes_generator.py | 39 +++++++++++++++++++----------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 7d85fde3a0..2952011b67 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -52,6 +52,7 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): block_token_count = get_token_count(block, model=model) if current_token_count + block_token_count > chunk_size: + print(f"number of blocks in chunk: {len(current_chunk)}") chunks.append("\n\n".join(current_chunk)) current_chunk = [] current_token_count = 0 @@ -102,12 +103,18 @@ def summarize_with_gpt(input_text): prompt = f""" You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: -1. **Analyze Commit Logs**: - - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: - - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. - - Exclude: - - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. - - Highly technical details or jargon that might confuse non-developers. +1. **Must Include Only Meaningful Changes**: + - Only keep commits that significantly impact users, such as: + - New features + - UI changes + - Major performance improvements + - Security patches + - Web platform changes that affect how websites behave + - DO NOT include: + - Small bug fixes unless critical + - Internal code refactoring + - Test changes or documentation updates + - Developer tooling or CI/CD pipeline changes 2. **Output Format**: - Use simple, non-technical language suitable for release notes. @@ -119,11 +126,15 @@ def summarize_with_gpt(input_text): - The output must only be the final list, following the specified format. - Ensure every description is clear, complete, and directly relevant to end users. -4. **Input**: +4. **Be Aggressive in Filtering**: +- If you're unsure whether a commit impacts end users, EXCLUDE it. +- Do not list developer-focused changes. + +5. **Input**: Here is the chunk of commit logs you need to focus on: {input_text} -5. **Output**: +6. **Output**: The output should just be the list. Nothing more and nothing less. """ @@ -199,9 +210,9 @@ def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) print(f"LENGTH OF CHUNKS: {len(chunks)}") print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") - # summaries = [summarize_with_gpt(chunk) for chunk in chunks] + summaries = [summarize_with_gpt(chunk) for chunk in chunks] - summaries = [summarize_with_gpt(chunks[0])] + # summaries = [summarize_with_gpt(chunks[0])] return summaries @@ -267,11 +278,11 @@ def generate_worthy_commits(): combined_list = "\n".join(summaries) - # logger.info("Removing duplicates from the list...") - # combined_list = remove_duplicates(combined_list) + logger.info("Removing duplicates from the list...") + combined_list = remove_duplicates(combined_list) - # logger.info("Removing unworthy commits from the list...") - # combined_list = remove_unworthy_commits(combined_list) + logger.info("Removing unworthy commits from the list...") + combined_list = remove_unworthy_commits(combined_list) with open(OUTPUT_FILE, "w") as file: file.write(combined_list) From 1dacebf291e4a35d51b52cd88f3156825f991806 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 10 Feb 2025 14:46:06 -0500 Subject: [PATCH 11/70] Fixed prompt and increased chunk size --- scripts/release_notes_generator.py | 133 +++++++++-------------------- 1 file changed, 38 insertions(+), 95 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 2952011b67..22adf3a7cf 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -19,7 +19,7 @@ FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" REPO_DIRECTORY = "hg_dir" OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" -CHUNK_SIZE = 5000 +CHUNK_SIZE = 10000 def run_hg_log(query, repo_dir): @@ -122,20 +122,25 @@ def summarize_with_gpt(input_text): [Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. -3. **Output Strictness**: - - The output must only be the final list, following the specified format. - - Ensure every description is clear, complete, and directly relevant to end users. +3. **Bad Example (DO NOT FOLLOW)**: +[Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition. +We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes. 4. **Be Aggressive in Filtering**: -- If you're unsure whether a commit impacts end users, EXCLUDE it. -- Do not list developer-focused changes. + - If you're unsure whether a commit impacts end users, **EXCLUDE it**. + - Do **not** list developer-focused changes. -5. **Input**: +5. **Select Only the Top 5 Commits**: + - If there are more than 5 relevant commits, choose the **most impactful ones**. + +6. **Input**: Here is the chunk of commit logs you need to focus on: {input_text} -6. **Output**: - The output should just be the list. Nothing more and nothing less. +7. **Output Requirements**: + - Output must be raw CSV text—no formatting, no extra text. + - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. + - Do not include the words "CSV" or any headers—just the data. """ try: @@ -182,13 +187,30 @@ def remove_duplicates(input_text): def remove_unworthy_commits(input_text): - prompt = f"""Review the following list of release notes and remove anything list entry that is not worthy or necessary for inclusion in official release notes. Focus on keeping only changes that are meaningful, impactful, and directly relevant to end users, such as new features, significant fixes, performance improvements, accessibility enhancements, or critical security updates. Remove anything minor, overly technical, or irrelevant. - -Here is the list: + prompt = f"""Review the following list of release notes and **remove anything that is not worthy** of official release notes. Keep only changes that are **meaningful, impactful, and directly relevant to end users**, such as: +- **New features** that users will notice and interact with. +- **Significant fixes** that resolve major user-facing issues. +- **Performance improvements** that make a clear difference in speed or responsiveness. +- **Accessibility enhancements** that improve usability for a broad set of users. +- **Critical security updates** that protect users from vulnerabilities. + +**Strict Filtering Criteria - REMOVE the following:** +- **Overly technical web platform changes** (e.g., spec compliance tweaks, behind-the-scenes API adjustments). +- **Developer-facing features** that have no direct user impact. +- **Minor UI refinements** (e.g., button width adjustments, small animation tweaks). +- **Bug fixes that don’t impact most users**. +- **Obscure web compatibility changes** that apply only to edge-case websites. +- **Duplicate entries** or similar changes that were already listed. + +**Here is the list to filter:** {input_text} -Return the cleaned-up list in the same format. Only remove the list entries you do not deem worthy of being included in the release notes. KEEP THE SAME FORMAT, DO NOT ALTER THE ENTRIES THEMSELVES. Do not add any text before or after the list.""" - +**Instructions:** +- **KEEP THE SAME FORMAT** (do not change the structure of entries that remain). +- **REMOVE UNWORTHY ENTRIES ENTIRELY** (do not rewrite them—just delete). +- **DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST.** +- The output must be **only the cleaned-up list**, formatted exactly the same way. +""" try: response = client.chat.completions.create( messages=[ @@ -278,8 +300,8 @@ def generate_worthy_commits(): combined_list = "\n".join(summaries) - logger.info("Removing duplicates from the list...") - combined_list = remove_duplicates(combined_list) + # logger.info("Removing duplicates from the list...") + # combined_list = remove_duplicates(combined_list) logger.info("Removing unworthy commits from the list...") combined_list = remove_unworthy_commits(combined_list) @@ -300,82 +322,3 @@ def generate_worthy_commits(): if __name__ == "__main__": generate_worthy_commits() - - -# import openai - -# MODEL = "gpt-4o" - -# # Pre-existing conversation - -# message1 = """You are a software developer who has to change the code below by following a given Code Review. -# The Code Review is attached to the line of code starting with the line number Start_Line and -# ending with the line number End_Line. There are also characters (- and +) showing where a line -# of code in the diff hunk has been removed (marked with a - at the beginning of the line) or added -# (marked with a + at the beginning of the line). The New Code Diff should be in the correct Git diff -# format, where added lines (on top of the diff hunk) are denoted with the + character. Lines removed -# from the Diff Hunk should be denoted with the - character. Your output must not contain any trailing -# tokens/characters. Your output must adhere to the following format: "Short Explanation: [...] - -# New Code Diff: [...]" - -# Start_Line: -# 1469 - -# End_Line: -# 1470 - -# Code Review: -# {'raw': 'Please fix this warning while you are here.'} - -# Diff Hunk: - -# Code Review: -# {comment_content} - -# Diff Hunk: -# ``` -# diff -u b/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp -# --- b/xpfe/appshell/AppWindow.cpp -# +++ b/xpfe/appshell/AppWindow.cpp -# @@ -1466,8 +1466,9 @@ -# nsresult errorCode; -# int32_t zLevel = stateString.ToInteger(&errorCode); -# if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) && -# - zLevel <= int32_t(highestZ)) -# + zLevel <= int32_t(highestZ)) { -# SetZLevel(zLevel); -# + } -# } - -# return gotState; -# ``` -# """ - -# message2 = """('Short Explanation: The code review suggests fixing a warning related to type conversion by explicitly casting `lowestZ` and `highestZ` to `int32_t`.\n\nNew Code Diff:\n```diff\ndiff --git a/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp\n--- a/xpfe/appshell/AppWindow.cpp\n+++ b/xpfe/appshell/AppWindow.cpp\n@@ -1463,11 +1463,12 @@\n // zlevel\n windowElement->GetAttribute(ZLEVEL_ATTRIBUTE, stateString);\n if (!stateString.IsEmpty()) {\n nsresult errorCode;\n int32_t zLevel = stateString.ToInteger(&errorCode);\n- if (NS_SUCCEEDED(errorCode) && zLevel >= lowestZ && zLevel <= highestZ)\n+ if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) &&\n+ zLevel <= int32_t(highestZ))\n SetZLevel(zLevel);\n }\n \n return gotState;\n }\n```', '\n You are a software developer who has to change the code below by following a given Code Review.\n The Code Review is attached to the line of code starting with the line number Start_Line and\n ending with the line number End_Line. There are also characters (- and +) showing where a line\n of code in the diff hunk has been removed (marked with a - at the beginning of the line) or added\n (marked with a + at the beginning of the line). The New Code Diff should be in the correct Git diff\n format, where added lines (on top of the diff hunk) are denoted with the + character. Lines removed\n from the Diff Hunk should be denoted with the - character. Your output must not contain any trailing\n tokens/characters. Your output must adhere to the following format: "Short Explanation: [...] \n\n New Code Diff: [...]"\n\n Start_Line:\n 1469\n\n End_Line:\n 1470\n\n Code Review:\n {\'raw\': \'Please fix this warning while you are here.\'}\n\n Diff Hunk:\n ```\n diff --git a/xpfe/appshell/AppWindow.cpp b/xpfe/appshell/AppWindow.cpp\n--- a/xpfe/appshell/AppWindow.cpp\n+++ b/xpfe/appshell/AppWindow.cpp\n@@ -1463,11 +1463,12 @@\n // zlevel\n windowElement->GetAttribute(ZLEVEL_ATTRIBUTE, stateString);\n if (!stateString.IsEmpty()) {\n nsresult errorCode;\n int32_t zLevel = stateString.ToInteger(&errorCode);\n- if (NS_SUCCEEDED(errorCode) && zLevel >= lowestZ && zLevel <= highestZ)\n+ if (NS_SUCCEEDED(errorCode) && zLevel >= int32_t(lowestZ) &&\n+ zLevel <= int32_t(highestZ))\n SetZLevel(zLevel);\n }\n \n return gotState;\n }\n\n ```\n ') -# """ - -# messages = [ -# {"role": "user", "content": message1}, -# {"role": "assistant", "content": message2} -# ] - -# def chat(prompt): -# global messages - -# messages.append({"role": "user", "content": prompt}) - -# response = client.chat.completions.create( -# messages=messages, -# model=MODEL, -# temperature=0.1, -# ) -# return response.choices[0].message.content.strip() - -# # assistant_message = response["choices"][0]["message"]["content"].strip() - -# # messages.append({"role": "assistant", "content": assistant_message}) - -# # return assistant_message - -# print(chat("How did you know that this was the fix?")) From 4f0e0810e4cce8a6edfa87da4457ec90c09fc96d Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 10 Feb 2025 14:50:46 -0500 Subject: [PATCH 12/70] Removed asterisks --- scripts/release_notes_generator.py | 95 ++++++++++-------------------- 1 file changed, 31 insertions(+), 64 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 22adf3a7cf..33bec74aec 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -67,43 +67,10 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): def summarize_with_gpt(input_text): - # prompt = f""" - # You are an expert in analyzing commit logs. Your task is to analyze a chunk of commit logs and produce a summary in a clear and user-friendly format. Follow these steps: - - # 1. **Analyze Commit Logs**: - # - Identify commits or groups of commits relevant for potential release notes. Focus on changes that: - # - Are meaningful to **end users**, such as new features, user-facing improvements, or critical updates. - # - Exclude: - # - Internal refactorings, test-related updates, or minor low-level changes that are not relevant to end users. - # - Highly technical details or jargon that might confuse non-developers. - - # 2. **Enhance Context**: - # - If a commit lacks sufficient information (e.g., vague descriptions or unexplained references to functions), break the process into two steps: - # - Step 1: Explain why the commit's description is insufficient for end users (e.g., the function's purpose is unclear or its relevance is ambiguous). - # - Step 2: Perform a reasoning step where you hypothesize or research the broader context, including the potential impact on security, performance, or user experience. - # - Use your analysis to enhance clarity and add relevant context to the description. This ensures that whatever you are adding to the list is actually worthy of being in the release notes, rather than you adding it with no understanding of it. - - # 3. **Output Format**: - # - Use simple, non-technical language suitable for release notes. - # - Use the following strict format for each relevant commit, in CSV FORMAT: - # [Type of Change],Description of the change,Bug XXXX,Reasoning behind the change (if necessary) - # - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. - - # 4. **Output Strictness**: - # - The output must only be the final list, following the specified format. - # - Ensure every description is clear, complete, and directly relevant to end users. - - # 6. **Input**: - # Here is the chunk of commit logs you need to focus on: - # {input_text} - - # 7. **Output**: - # The output should just be the list. Nothing more and nothing less. - # """ prompt = f""" You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: -1. **Must Include Only Meaningful Changes**: +1. Must Include Only Meaningful Changes: - Only keep commits that significantly impact users, such as: - New features - UI changes @@ -116,28 +83,28 @@ def summarize_with_gpt(input_text): - Test changes or documentation updates - Developer tooling or CI/CD pipeline changes -2. **Output Format**: +2. Output Format: - Use simple, non-technical language suitable for release notes. - Use the following strict format for each relevant commit, in CSV FORMAT: [Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. -3. **Bad Example (DO NOT FOLLOW)**: +3. Bad Example (DO NOT FOLLOW): [Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition. We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes. -4. **Be Aggressive in Filtering**: - - If you're unsure whether a commit impacts end users, **EXCLUDE it**. - - Do **not** list developer-focused changes. +4. Be Aggressive in Filtering: + - If you're unsure whether a commit impacts end users, EXCLUDE it. + - Do not list developer-focused changes. -5. **Select Only the Top 5 Commits**: - - If there are more than 5 relevant commits, choose the **most impactful ones**. +5. Select Only the Top 5 Commits: + - If there are more than 5 relevant commits, choose the most impactful ones. -6. **Input**: +6. Input: Here is the chunk of commit logs you need to focus on: {input_text} -7. **Output Requirements**: +7. Output Requirements: - Output must be raw CSV text—no formatting, no extra text. - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - Do not include the words "CSV" or any headers—just the data. @@ -187,29 +154,29 @@ def remove_duplicates(input_text): def remove_unworthy_commits(input_text): - prompt = f"""Review the following list of release notes and **remove anything that is not worthy** of official release notes. Keep only changes that are **meaningful, impactful, and directly relevant to end users**, such as: -- **New features** that users will notice and interact with. -- **Significant fixes** that resolve major user-facing issues. -- **Performance improvements** that make a clear difference in speed or responsiveness. -- **Accessibility enhancements** that improve usability for a broad set of users. -- **Critical security updates** that protect users from vulnerabilities. - -**Strict Filtering Criteria - REMOVE the following:** -- **Overly technical web platform changes** (e.g., spec compliance tweaks, behind-the-scenes API adjustments). -- **Developer-facing features** that have no direct user impact. -- **Minor UI refinements** (e.g., button width adjustments, small animation tweaks). -- **Bug fixes that don’t impact most users**. -- **Obscure web compatibility changes** that apply only to edge-case websites. -- **Duplicate entries** or similar changes that were already listed. - -**Here is the list to filter:** + prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: +- New features that users will notice and interact with. +- Significant fixes that resolve major user-facing issues. +- Performance improvements that make a clear difference in speed or responsiveness. +- Accessibility enhancements that improve usability for a broad set of users. +- Critical security updates that protect users from vulnerabilities. + +Strict Filtering Criteria - REMOVE the following: +- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments). +- Developer-facing features that have no direct user impact. +- Minor UI refinements (e.g., button width adjustments, small animation tweaks). +- Bug fixes that don’t impact most users. +- Obscure web compatibility changes that apply only to edge-case websites. +- Duplicate entries or similar changes that were already listed. + +Here is the list to filter: {input_text} -**Instructions:** -- **KEEP THE SAME FORMAT** (do not change the structure of entries that remain). -- **REMOVE UNWORTHY ENTRIES ENTIRELY** (do not rewrite them—just delete). -- **DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST.** -- The output must be **only the cleaned-up list**, formatted exactly the same way. +Instructions: +- KEEP THE SAME FORMAT (do not change the structure of entries that remain). +- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). +- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. +- The output must be only the cleaned-up list, formatted exactly the same way. """ try: response = client.chat.completions.create( From 821790c4eb4ad0bf0910a08c9c95fa3f3732157a Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 11 Feb 2025 15:01:20 -0500 Subject: [PATCH 13/70] Changed version --- scripts/release_notes_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index 33bec74aec..f254c7cd23 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -15,8 +15,8 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -FIREFOX_VERSION_1 = "FIREFOX_BETA_132_BASE" -FIREFOX_VERSION_2 = "FIREFOX_BETA_133_BASE" +FIREFOX_VERSION_1 = "FIREFOX_BETA_135_BASE" +FIREFOX_VERSION_2 = "FIREFOX_BETA_136_BASE" REPO_DIRECTORY = "hg_dir" OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" CHUNK_SIZE = 10000 @@ -97,8 +97,8 @@ def summarize_with_gpt(input_text): - If you're unsure whether a commit impacts end users, EXCLUDE it. - Do not list developer-focused changes. -5. Select Only the Top 5 Commits: - - If there are more than 5 relevant commits, choose the most impactful ones. +5. Select Only the Top 10 Commits: + - If there are more than 10 relevant commits, choose the most impactful ones. 6. Input: Here is the chunk of commit logs you need to focus on: From 34e366a4f219efed0fbb89620c3a9df359041a99 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 13 Feb 2025 16:58:28 -0500 Subject: [PATCH 14/70] Added bug filtering for webextensions --- scripts/release_notes_generator.py | 48 +++++++++++++++++------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index f254c7cd23..c3ff111092 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -6,6 +6,9 @@ import tiktoken from openai import OpenAI +from bugbug import db +from bugbug.bugzilla import BUGS_DB + MODEL = "gpt-4o" client = OpenAI( @@ -52,7 +55,6 @@ def split_into_chunks(commit_log, chunk_size, model="gpt-4"): block_token_count = get_token_count(block, model=model) if current_token_count + block_token_count > chunk_size: - print(f"number of blocks in chunk: {len(current_chunk)}") chunks.append("\n\n".join(current_chunk)) current_chunk = [] current_token_count = 0 @@ -197,15 +199,11 @@ def remove_unworthy_commits(input_text): def generate_summaries(commit_log): chunks = split_into_chunks(commit_log, CHUNK_SIZE) - print(f"LENGTH OF CHUNKS: {len(chunks)}") - print(f"LENGTH OF FIRST CHUNK: {len(chunks[0])}") summaries = [summarize_with_gpt(chunk) for chunk in chunks] - - # summaries = [summarize_with_gpt(chunks[0])] return summaries -def clean_commits(commit_log, keywords): +def clean_commits(commit_log, keywords, bug_dict): cleaned_commits = [] commit_blocks = commit_log.split("\n\n") @@ -219,6 +217,13 @@ def clean_commits(commit_log, keywords): and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) and not re.search(r"nightly", block, re.IGNORECASE) ): + bug_id = re.search(r"Bug (\d+)", block, re.IGNORECASE) + if ( + int(bug_id.group(1)) in bug_dict.keys() + and bug_dict[int(bug_id.group(1))] == "WebExtensions" + ): + continue + match = re.search(r"summary:\s+(.+)", block) commit_summary = match.group(1) if match else None cleaned_commits.append(commit_summary) @@ -226,7 +231,18 @@ def clean_commits(commit_log, keywords): return "\n\n".join(cleaned_commits) -def generate_worthy_commits(): +def load_bug_data(): + return {bug["id"]: bug["product"] for bug in db.read(BUGS_DB)} + + +def is_webextensions_bug(bug_id): + for bug in db.read(BUGS_DB): + if int(bug["id"]) == bug_id: + return bug["product"] == "WebExtensions" + return False + + +def generate_worthy_commits(bug_dict): logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") logger.info("Finding the branching point commit...") @@ -259,17 +275,13 @@ def generate_worthy_commits(): "add tests", "disable test", ] - cleaned_commits = clean_commits(changes_output, keywords_to_remove) - # cleaned_commits = cleaned_commits[0:40000] + cleaned_commits = clean_commits(changes_output, keywords_to_remove, bug_dict) logger.info("Generating summaries for cleaned commits...") summaries = generate_summaries(cleaned_commits) combined_list = "\n".join(summaries) - # logger.info("Removing duplicates from the list...") - # combined_list = remove_duplicates(combined_list) - logger.info("Removing unworthy commits from the list...") combined_list = remove_unworthy_commits(combined_list) @@ -277,15 +289,9 @@ def generate_worthy_commits(): file.write(combined_list) logger.info(f"Worthy commits saved to {OUTPUT_FILE}") - # with open(OUTPUT_FILE, "r", encoding="utf-8") as file: - # file_contents = file.read() - - # cleaned_commits = remove_duplicates(file_contents) - # cleaned_commits = remove_unworthy_commits(cleaned_commits) - - # with open(OUTPUT_FILE, "w", encoding="utf-8") as file: - # file.write(cleaned_commits) if __name__ == "__main__": - generate_worthy_commits() + db.download(BUGS_DB) + bug_dict = load_bug_data() + generate_worthy_commits(bug_dict) From 27204d1d363958618026c6382d210d0da4bb5154 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 19 Feb 2025 18:57:21 -0500 Subject: [PATCH 15/70] Edited prompt --- scripts/release_notes_generator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py index c3ff111092..404e4279b0 100644 --- a/scripts/release_notes_generator.py +++ b/scripts/release_notes_generator.py @@ -73,17 +73,18 @@ def summarize_with_gpt(input_text): You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: 1. Must Include Only Meaningful Changes: - - Only keep commits that significantly impact users, such as: + - Only keep commits that significantly impact users and are strictly user-facing, such as: - New features - UI changes - Major performance improvements - - Security patches + - Security patches (if user-facing) - Web platform changes that affect how websites behave - DO NOT include: - Small bug fixes unless critical - Internal code refactoring - Test changes or documentation updates - Developer tooling or CI/CD pipeline changes +Again, only include changes that are STRICTLY USER-FACING. 2. Output Format: - Use simple, non-technical language suitable for release notes. From 2033b89b229fcf06fc08231e5190470828f825ae Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 21 Feb 2025 16:00:30 -0500 Subject: [PATCH 16/70] Separated release notes into a runner and tool, updated the method to filter out bugs behind features that are not yet shipped --- bugbug/tools/release_notes.py | 267 ++++++++++++++++++++++++++++++++ scripts/release_notes_runner.py | 36 +++++ 2 files changed, 303 insertions(+) create mode 100644 bugbug/tools/release_notes.py create mode 100644 scripts/release_notes_runner.py diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py new file mode 100644 index 0000000000..285dece505 --- /dev/null +++ b/bugbug/tools/release_notes.py @@ -0,0 +1,267 @@ +import logging +import os +import re +import subprocess + +import tiktoken +from openai import OpenAI + +from bugbug import db +from bugbug.bugzilla import BUGS_DB + +MODEL = "gpt-4o" + +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class ReleaseNotesGenerator: + def __init__(self, repo_directory, version1, version2, chunk_size=10000): + self.repo_directory = repo_directory + self.version1 = version1 + self.version2 = version2 + self.chunk_size = chunk_size + self.output_file = f"version_summary_{self.version2}.txt" + + def run_hg_log(self, query): + try: + result = subprocess.run( + ["hg", "log", "-r", query], + cwd=self.repo_directory, + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + logger.error(f"Error running hg log: {e}") + return None + + def get_token_count(self, text): + encoding = tiktoken.encoding_for_model(MODEL) + return len(encoding.encode(text)) + + def split_into_chunks(self, commit_log): + commit_blocks = commit_log.split("\n\n") + chunks = [] + current_chunk = [] + current_token_count = 0 + + for block in commit_blocks: + block_token_count = self.get_token_count(block) + + if current_token_count + block_token_count > self.chunk_size: + chunks.append("\n\n".join(current_chunk)) + current_chunk = [] + current_token_count = 0 + + current_chunk.append(block) + current_token_count += block_token_count + + if current_chunk: + chunks.append("\n\n".join(current_chunk)) + + return chunks + + def summarize_with_gpt(self, input_text): + prompt = f""" +You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: + +1. Must Include Only Meaningful Changes: + - Only keep commits that significantly impact users and are strictly user-facing, such as: + - New features + - UI changes + - Major performance improvements + - Security patches (if user-facing) + - Web platform changes that affect how websites behave + - DO NOT include: + - Small bug fixes unless critical + - Internal code refactoring + - Test changes or documentation updates + - Developer tooling or CI/CD pipeline changes +Again, only include changes that are STRICTLY USER-FACING. + +2. Output Format: + - Use simple, non-technical language suitable for release notes. + - Use the following strict format for each relevant commit, in CSV FORMAT: +[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users + - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + +3. Bad Example (DO NOT FOLLOW): +[Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition. +We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes. + +4. Be Aggressive in Filtering: + - If you're unsure whether a commit impacts end users, EXCLUDE it. + - Do not list developer-focused changes. + +5. Select Only the Top 10 Commits: + - If there are more than 10 relevant commits, choose the most impactful ones. + +6. Input: + Here is the chunk of commit logs you need to focus on: + {input_text} + +7. Output Requirements: + - Output must be raw CSV text—no formatting, no extra text. + - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. + - Do not include the words "CSV" or any headers—just the data. +""" + try: + response = client.chat.completions.create( + messages=[{"role": "user", "content": prompt}], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error calling OpenAI API: {e}") + return "Error: Unable to generate summary." + + def generate_summaries(self, commit_log): + chunks = self.split_into_chunks(commit_log) + return [self.summarize_with_gpt(chunk) for chunk in chunks] + + def clean_commits(self, commit_log, keywords, bug_dict): + cleaned_commits = [] + commit_blocks = commit_log.split("\n\n") + + for block in commit_blocks: + if ( + not any( + re.search(rf"\b{keyword}\b", block, re.IGNORECASE) + for keyword in keywords + ) + and re.search(r"Bug \d+", block, re.IGNORECASE) + and not re.search( + r"release\+treescript@mozilla\.org", block, re.IGNORECASE + ) + and not re.search(r"nightly", block, re.IGNORECASE) + ): + bug_id_match = re.search(r"Bug (\d+)", block, re.IGNORECASE) + if not bug_id_match: + continue + + bug_id = int(bug_id_match.group(1)) + if bug_id not in bug_dict: + continue + + bug_info = bug_dict[bug_id] + should_exclude = False + + if "blocks" in bug_info: + for blocked_bug_id in bug_info["blocks"]: + if blocked_bug_id in bug_dict: + blocked_bug = bug_dict[blocked_bug_id] + if "[meta]" in blocked_bug.get("summary", ""): + if ( + not blocked_bug.get("version") + or blocked_bug["version"].lower() == "unspecified" + ): + should_exclude = True + break + + if should_exclude: + continue + + match = re.search(r"summary:\s+(.+)", block) + commit_summary = match.group(1) if match else None + cleaned_commits.append(commit_summary) + + return "\n\n".join(cleaned_commits) + + def load_bug_data(self): + bug_data = list(db.read(BUGS_DB)) + return {bug["id"]: bug for bug in bug_data} + + def remove_unworthy_commits(self, input_text): + prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: +- New features that users will notice and interact with. +- Significant fixes that resolve major user-facing issues. +- Performance improvements that make a clear difference in speed or responsiveness. +- Accessibility enhancements that improve usability for a broad set of users. +- Critical security updates that protect users from vulnerabilities. + +Strict Filtering Criteria - REMOVE the following: +- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments). +- Developer-facing features that have no direct user impact. +- Minor UI refinements (e.g., button width adjustments, small animation tweaks). +- Bug fixes that don’t impact most users. +- Obscure web compatibility changes that apply only to edge-case websites. +- Duplicate entries or similar changes that were already listed. + +Here is the list to filter: +{input_text} + +Instructions: +- KEEP THE SAME FORMAT (do not change the structure of entries that remain). +- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). +- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. +- The output must be only the cleaned-up list, formatted exactly the same way. +""" + try: + response = client.chat.completions.create( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + except Exception as e: + logger.error(f"Error while calling OpenAI API: {e}") + return "Error: Unable to remove unworthy commits." + + def generate_worthy_commits(self): + bug_dict = self.load_bug_data() + logger.info(f"Generating list of commits for version: {self.version2}") + + logger.info("Finding the branching point commit...") + branching_commit_query = f"ancestor({self.version1}, {self.version2})" + branching_commit_output = self.run_hg_log(branching_commit_query) + + if not branching_commit_output: + logger.error("Failed to find the branching point commit. Exiting.") + return + + branching_commit_hash = branching_commit_output.split(":")[1].split()[0] + logger.info(f"Branching point commit: {branching_commit_hash}") + + logger.info("Fetching the list of changes...") + changes_query = ( + f"descendants({branching_commit_hash}) and ancestors({self.version2})" + ) + changes_output = self.run_hg_log(changes_query) + + if not changes_output: + logger.error("Failed to fetch the list of changes. Exiting.") + return + + logger.info("Cleaning commit log...") + keywords_to_remove = [ + "Backed out", + "a=testonly", + "DONTBUILD", + "add tests", + "disable test", + ] + cleaned_commits = self.clean_commits( + changes_output, keywords_to_remove, bug_dict + ) + + logger.info("Generating summaries for cleaned commits...") + summaries = self.generate_summaries(cleaned_commits) + combined_list = "\n".join(summaries) + + logger.info("Removing unworthy commits from the list...") + combined_list = self.remove_unworthy_commits(combined_list) + + with open(self.output_file, "w") as file: + file.write(combined_list) + + logger.info(f"Worthy commits saved to {self.output_file}") diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py new file mode 100644 index 0000000000..ff9326aa0d --- /dev/null +++ b/scripts/release_notes_runner.py @@ -0,0 +1,36 @@ +import argparse +import logging + +from bugbug import db +from bugbug.bugzilla import BUGS_DB +from bugbug.tools.release_notes import ReleaseNotesGenerator + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def main(): + parser = argparse.ArgumentParser(description="Generate Firefox release notes.") + parser.add_argument( + "--repo", default="hg_dir", help="Path to the Mercurial repository" + ) + parser.add_argument("--version1", required=True, help="Base version identifier") + parser.add_argument("--version2", required=True, help="Target version identifier") + parser.add_argument( + "--chunk-size", type=int, default=10000, help="Chunk size for token processing" + ) + + args = parser.parse_args() + + generator = ReleaseNotesGenerator( + repo_directory=args.repo, + version1=args.version1, + version2=args.version2, + chunk_size=args.chunk_size, + ) + generator.generate_worthy_commits() + + +if __name__ == "__main__": + db.download(BUGS_DB) + main() From 7638d2a6be6b1f23207fed6bdc7652c272b06e64 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 24 Feb 2025 16:09:44 -0500 Subject: [PATCH 17/70] Fixed up runner to take in only one version --- bugbug/tools/release_notes.py | 14 +++++++++++--- scripts/release_notes_runner.py | 6 ++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 285dece505..14a645114a 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -18,13 +18,21 @@ class ReleaseNotesGenerator: - def __init__(self, repo_directory, version1, version2, chunk_size=10000): + def __init__(self, repo_directory, version, chunk_size=10000): self.repo_directory = repo_directory - self.version1 = version1 - self.version2 = version2 + self.version2 = version + self.version1 = self.get_previous_version(version) self.chunk_size = chunk_size self.output_file = f"version_summary_{self.version2}.txt" + def get_previous_version(self, current_version): + match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version) + if not match: + raise ValueError("Invalid version format") + prefix, version_number, suffix = match.groups() + previous_version_number = int(version_number) - 1 + return f"{prefix}{previous_version_number}{suffix}" + def run_hg_log(self, query): try: result = subprocess.run( diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index ff9326aa0d..ba7c6b4c22 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -14,8 +14,7 @@ def main(): parser.add_argument( "--repo", default="hg_dir", help="Path to the Mercurial repository" ) - parser.add_argument("--version1", required=True, help="Base version identifier") - parser.add_argument("--version2", required=True, help="Target version identifier") + parser.add_argument("--version", required=True, help="Target version identifier") parser.add_argument( "--chunk-size", type=int, default=10000, help="Chunk size for token processing" ) @@ -24,8 +23,7 @@ def main(): generator = ReleaseNotesGenerator( repo_directory=args.repo, - version1=args.version1, - version2=args.version2, + version=args.version, chunk_size=args.chunk_size, ) generator.generate_worthy_commits() From d9f683111699f1fa743da4a6622ccbd8065db6a1 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 25 Feb 2025 13:42:49 -0500 Subject: [PATCH 18/70] Moved version to the function --- bugbug/tools/release_notes.py | 9 +++++---- scripts/release_notes_runner.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 14a645114a..f4d55aaded 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -20,10 +20,7 @@ class ReleaseNotesGenerator: def __init__(self, repo_directory, version, chunk_size=10000): self.repo_directory = repo_directory - self.version2 = version - self.version1 = self.get_previous_version(version) self.chunk_size = chunk_size - self.output_file = f"version_summary_{self.version2}.txt" def get_previous_version(self, current_version): match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version) @@ -225,8 +222,12 @@ def remove_unworthy_commits(self, input_text): logger.error(f"Error while calling OpenAI API: {e}") return "Error: Unable to remove unworthy commits." - def generate_worthy_commits(self): + def generate_worthy_commits(self, version): bug_dict = self.load_bug_data() + self.version2 = version + self.version1 = self.get_previous_version(version) + self.output_file = f"version_summary_{self.version2}.txt" + logger.info(f"Generating list of commits for version: {self.version2}") logger.info("Finding the branching point commit...") diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index ba7c6b4c22..9ab25c0e4d 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -26,7 +26,7 @@ def main(): version=args.version, chunk_size=args.chunk_size, ) - generator.generate_worthy_commits() + generator.generate_worthy_commits(version=args.version) if __name__ == "__main__": From 93a5982e8b6d80f770018d7e2db78f8004f0058f Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 3 Mar 2025 15:20:30 -0500 Subject: [PATCH 19/70] Fixed release notes script to make use of URL instead of local repo --- bugbug/tools/release_notes.py | 100 ++++++++++---------------------- scripts/release_notes_runner.py | 8 --- 2 files changed, 30 insertions(+), 78 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index f4d55aaded..413c6508dd 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,14 +1,12 @@ import logging import os import re -import subprocess +import requests import tiktoken +from bs4 import BeautifulSoup from openai import OpenAI -from bugbug import db -from bugbug.bugzilla import BUGS_DB - MODEL = "gpt-4o" client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) @@ -18,8 +16,7 @@ class ReleaseNotesGenerator: - def __init__(self, repo_directory, version, chunk_size=10000): - self.repo_directory = repo_directory + def __init__(self, chunk_size=10000): self.chunk_size = chunk_size def get_previous_version(self, current_version): @@ -30,20 +27,6 @@ def get_previous_version(self, current_version): previous_version_number = int(version_number) - 1 return f"{prefix}{previous_version_number}{suffix}" - def run_hg_log(self, query): - try: - result = subprocess.run( - ["hg", "log", "-r", query], - cwd=self.repo_directory, - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() - except subprocess.CalledProcessError as e: - logger.error(f"Error running hg log: {e}") - return None - def get_token_count(self, text): encoding = tiktoken.encoding_for_model(MODEL) return len(encoding.encode(text)) @@ -129,9 +112,9 @@ def generate_summaries(self, commit_log): chunks = self.split_into_chunks(commit_log) return [self.summarize_with_gpt(chunk) for chunk in chunks] - def clean_commits(self, commit_log, keywords, bug_dict): + def clean_commits(self, commit_log, keywords): cleaned_commits = [] - commit_blocks = commit_log.split("\n\n") + commit_blocks = commit_log.split("\n") for block in commit_blocks: if ( @@ -149,38 +132,15 @@ def clean_commits(self, commit_log, keywords, bug_dict): if not bug_id_match: continue - bug_id = int(bug_id_match.group(1)) - if bug_id not in bug_dict: - continue - - bug_info = bug_dict[bug_id] - should_exclude = False - - if "blocks" in bug_info: - for blocked_bug_id in bug_info["blocks"]: - if blocked_bug_id in bug_dict: - blocked_bug = bug_dict[blocked_bug_id] - if "[meta]" in blocked_bug.get("summary", ""): - if ( - not blocked_bug.get("version") - or blocked_bug["version"].lower() == "unspecified" - ): - should_exclude = True - break - - if should_exclude: - continue + bug_position = re.search(r"Bug \d+.*", block, re.IGNORECASE) + if bug_position: + block = bug_position.group(0) - match = re.search(r"summary:\s+(.+)", block) - commit_summary = match.group(1) if match else None + commit_summary = block cleaned_commits.append(commit_summary) return "\n\n".join(cleaned_commits) - def load_bug_data(self): - bug_data = list(db.read(BUGS_DB)) - return {bug["id"]: bug for bug in bug_data} - def remove_unworthy_commits(self, input_text): prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: - New features that users will notice and interact with. @@ -223,32 +183,34 @@ def remove_unworthy_commits(self, input_text): return "Error: Unable to remove unworthy commits." def generate_worthy_commits(self, version): - bug_dict = self.load_bug_data() self.version2 = version self.version1 = self.get_previous_version(version) self.output_file = f"version_summary_{self.version2}.txt" logger.info(f"Generating list of commits for version: {self.version2}") - - logger.info("Finding the branching point commit...") - branching_commit_query = f"ancestor({self.version1}, {self.version2})" - branching_commit_output = self.run_hg_log(branching_commit_query) - - if not branching_commit_output: - logger.error("Failed to find the branching point commit. Exiting.") + url = f"https://hg.mozilla.org/releases/mozilla-release/pushloghtml?fromchange={self.version1}&tochange={self.version2}" + response = requests.get(url) + changes_output = "" + + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + commit_entries = soup.find_all("tr", class_="pushlogentry") + commits = [ + ( + entry.find_all("td")[1].text.strip(), + entry.find_all("td")[2].get_text(separator=" ", strip=True), + ) + for entry in commit_entries + ] + changes_output = "\n".join(commit[1] for commit in commits) + else: + logger.error( + f"Failed to retrieve the webpage. Status code: {response.status_code}" + ) return - branching_commit_hash = branching_commit_output.split(":")[1].split()[0] - logger.info(f"Branching point commit: {branching_commit_hash}") - - logger.info("Fetching the list of changes...") - changes_query = ( - f"descendants({branching_commit_hash}) and ancestors({self.version2})" - ) - changes_output = self.run_hg_log(changes_query) - if not changes_output: - logger.error("Failed to fetch the list of changes. Exiting.") + logger.error("No changes found.") return logger.info("Cleaning commit log...") @@ -259,9 +221,7 @@ def generate_worthy_commits(self, version): "add tests", "disable test", ] - cleaned_commits = self.clean_commits( - changes_output, keywords_to_remove, bug_dict - ) + cleaned_commits = self.clean_commits(changes_output, keywords_to_remove) logger.info("Generating summaries for cleaned commits...") summaries = self.generate_summaries(cleaned_commits) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index 9ab25c0e4d..f6de2c88da 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -1,8 +1,6 @@ import argparse import logging -from bugbug import db -from bugbug.bugzilla import BUGS_DB from bugbug.tools.release_notes import ReleaseNotesGenerator logging.basicConfig(level=logging.INFO) @@ -11,9 +9,6 @@ def main(): parser = argparse.ArgumentParser(description="Generate Firefox release notes.") - parser.add_argument( - "--repo", default="hg_dir", help="Path to the Mercurial repository" - ) parser.add_argument("--version", required=True, help="Target version identifier") parser.add_argument( "--chunk-size", type=int, default=10000, help="Chunk size for token processing" @@ -22,13 +17,10 @@ def main(): args = parser.parse_args() generator = ReleaseNotesGenerator( - repo_directory=args.repo, - version=args.version, chunk_size=args.chunk_size, ) generator.generate_worthy_commits(version=args.version) if __name__ == "__main__": - db.download(BUGS_DB) main() From 2b702fe32f06a5ab302b8f7c6c076644879cd265 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 3 Mar 2025 15:59:09 -0500 Subject: [PATCH 20/70] Removed old script --- scripts/release_notes_generator.py | 298 ----------------------------- 1 file changed, 298 deletions(-) delete mode 100644 scripts/release_notes_generator.py diff --git a/scripts/release_notes_generator.py b/scripts/release_notes_generator.py deleted file mode 100644 index 404e4279b0..0000000000 --- a/scripts/release_notes_generator.py +++ /dev/null @@ -1,298 +0,0 @@ -import logging -import os -import re -import subprocess - -import tiktoken -from openai import OpenAI - -from bugbug import db -from bugbug.bugzilla import BUGS_DB - -MODEL = "gpt-4o" - -client = OpenAI( - api_key=os.environ.get("OPENAI_API_KEY"), -) - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -FIREFOX_VERSION_1 = "FIREFOX_BETA_135_BASE" -FIREFOX_VERSION_2 = "FIREFOX_BETA_136_BASE" -REPO_DIRECTORY = "hg_dir" -OUTPUT_FILE = f"version_summary_{FIREFOX_VERSION_2}.txt" -CHUNK_SIZE = 10000 - - -def run_hg_log(query, repo_dir): - try: - result = subprocess.run( - ["hg", "log", "-r", query], - cwd=repo_dir, - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() - except subprocess.CalledProcessError as e: - print(f"Error running hg log: {e}") - return None - - -def get_token_count(text, model=MODEL): - encoding = tiktoken.encoding_for_model(model) - return len(encoding.encode(text)) - - -def split_into_chunks(commit_log, chunk_size, model="gpt-4"): - commit_blocks = commit_log.split("\n\n") - chunks = [] - current_chunk = [] - current_token_count = 0 - - for block in commit_blocks: - block_token_count = get_token_count(block, model=model) - - if current_token_count + block_token_count > chunk_size: - chunks.append("\n\n".join(current_chunk)) - current_chunk = [] - current_token_count = 0 - - current_chunk.append(block) - current_token_count += block_token_count - - if current_chunk: - chunks.append("\n\n".join(current_chunk)) - - return chunks - - -def summarize_with_gpt(input_text): - prompt = f""" -You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: - -1. Must Include Only Meaningful Changes: - - Only keep commits that significantly impact users and are strictly user-facing, such as: - - New features - - UI changes - - Major performance improvements - - Security patches (if user-facing) - - Web platform changes that affect how websites behave - - DO NOT include: - - Small bug fixes unless critical - - Internal code refactoring - - Test changes or documentation updates - - Developer tooling or CI/CD pipeline changes -Again, only include changes that are STRICTLY USER-FACING. - -2. Output Format: - - Use simple, non-technical language suitable for release notes. - - Use the following strict format for each relevant commit, in CSV FORMAT: -[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users - - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. - -3. Bad Example (DO NOT FOLLOW): -[Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition. -We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes. - -4. Be Aggressive in Filtering: - - If you're unsure whether a commit impacts end users, EXCLUDE it. - - Do not list developer-focused changes. - -5. Select Only the Top 10 Commits: - - If there are more than 10 relevant commits, choose the most impactful ones. - -6. Input: - Here is the chunk of commit logs you need to focus on: - {input_text} - -7. Output Requirements: - - Output must be raw CSV text—no formatting, no extra text. - - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - - Do not include the words "CSV" or any headers—just the data. -""" - - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to generate summary." - - -def remove_duplicates(input_text): - prompt = f"""Given the following list, remove any duplicate entries. That is, if two or more entries talk abou the same change (does not have to be identical wording), remove the less descriptive one. Do not alter anything else. - - Here is the list: - {input_text} - - The output should just be the list with the duplicates removed. Nothing more, nothing less. Do not add any text before or after the list. - """ - - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to remove duplicates." - - -def remove_unworthy_commits(input_text): - prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: -- New features that users will notice and interact with. -- Significant fixes that resolve major user-facing issues. -- Performance improvements that make a clear difference in speed or responsiveness. -- Accessibility enhancements that improve usability for a broad set of users. -- Critical security updates that protect users from vulnerabilities. - -Strict Filtering Criteria - REMOVE the following: -- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments). -- Developer-facing features that have no direct user impact. -- Minor UI refinements (e.g., button width adjustments, small animation tweaks). -- Bug fixes that don’t impact most users. -- Obscure web compatibility changes that apply only to edge-case websites. -- Duplicate entries or similar changes that were already listed. - -Here is the list to filter: -{input_text} - -Instructions: -- KEEP THE SAME FORMAT (do not change the structure of entries that remain). -- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). -- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. -- The output must be only the cleaned-up list, formatted exactly the same way. -""" - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to remove unworthy commits." - - -def generate_summaries(commit_log): - chunks = split_into_chunks(commit_log, CHUNK_SIZE) - summaries = [summarize_with_gpt(chunk) for chunk in chunks] - return summaries - - -def clean_commits(commit_log, keywords, bug_dict): - cleaned_commits = [] - commit_blocks = commit_log.split("\n\n") - - for block in commit_blocks: - if ( - not any( - re.search(rf"\b{keyword}\b", block, re.IGNORECASE) - for keyword in keywords - ) - and re.search(r"Bug \d+", block, re.IGNORECASE) - and not re.search(r"release\+treescript@mozilla\.org", block, re.IGNORECASE) - and not re.search(r"nightly", block, re.IGNORECASE) - ): - bug_id = re.search(r"Bug (\d+)", block, re.IGNORECASE) - if ( - int(bug_id.group(1)) in bug_dict.keys() - and bug_dict[int(bug_id.group(1))] == "WebExtensions" - ): - continue - - match = re.search(r"summary:\s+(.+)", block) - commit_summary = match.group(1) if match else None - cleaned_commits.append(commit_summary) - - return "\n\n".join(cleaned_commits) - - -def load_bug_data(): - return {bug["id"]: bug["product"] for bug in db.read(BUGS_DB)} - - -def is_webextensions_bug(bug_id): - for bug in db.read(BUGS_DB): - if int(bug["id"]) == bug_id: - return bug["product"] == "WebExtensions" - return False - - -def generate_worthy_commits(bug_dict): - logger.info(f"Generating list of commits for version: {FIREFOX_VERSION_2}") - - logger.info("Finding the branching point commit...") - branching_commit_query = f"ancestor({FIREFOX_VERSION_1}, {FIREFOX_VERSION_2})" - branching_commit_output = run_hg_log(branching_commit_query, REPO_DIRECTORY) - - if not branching_commit_output: - logger.error("Failed to find the branching point commit. Exiting.") - exit(1) - - branching_commit_hash = branching_commit_output.split(":")[1].split()[0] - logger.info(f"Branching point commit: {branching_commit_hash}") - - logger.info("Fetching the list of changes...") - changes_query = ( - f"descendants({branching_commit_hash}) and ancestors({FIREFOX_VERSION_2})" - ) - changes_output = run_hg_log(changes_query, REPO_DIRECTORY) - - if not changes_output: - logger.error("Failed to fetch the list of changes. Exiting.") - exit(1) - - logger.info("Cleaning commit log...") - keywords_to_remove = [ - "Backed out", - "a=testonly", - "a=release", - "DONTBUILD", - "add tests", - "disable test", - ] - cleaned_commits = clean_commits(changes_output, keywords_to_remove, bug_dict) - - logger.info("Generating summaries for cleaned commits...") - summaries = generate_summaries(cleaned_commits) - - combined_list = "\n".join(summaries) - - logger.info("Removing unworthy commits from the list...") - combined_list = remove_unworthy_commits(combined_list) - - with open(OUTPUT_FILE, "w") as file: - file.write(combined_list) - - logger.info(f"Worthy commits saved to {OUTPUT_FILE}") - - -if __name__ == "__main__": - db.download(BUGS_DB) - bug_dict = load_bug_data() - generate_worthy_commits(bug_dict) From 35ad0734172696002cfc571b2d7d58cb5c78cafe Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 3 Mar 2025 22:03:48 -0500 Subject: [PATCH 21/70] Removed HTML parsing with json --- bugbug/tools/release_notes.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 413c6508dd..8c77691af8 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -4,7 +4,6 @@ import requests import tiktoken -from bs4 import BeautifulSoup from openai import OpenAI MODEL = "gpt-4o" @@ -188,21 +187,23 @@ def generate_worthy_commits(self, version): self.output_file = f"version_summary_{self.version2}.txt" logger.info(f"Generating list of commits for version: {self.version2}") - url = f"https://hg.mozilla.org/releases/mozilla-release/pushloghtml?fromchange={self.version1}&tochange={self.version2}" + url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) changes_output = "" - if response.status_code == 200: - soup = BeautifulSoup(response.text, "html.parser") - commit_entries = soup.find_all("tr", class_="pushlogentry") - commits = [ - ( - entry.find_all("td")[1].text.strip(), - entry.find_all("td")[2].get_text(separator=" ", strip=True), - ) - for entry in commit_entries - ] - changes_output = "\n".join(commit[1] for commit in commits) + data = response.json() + commit_descriptions = [] + + for push_id in data: + push_data = data[push_id] + changesets = push_data.get("changesets", []) + + for changeset in changesets: + desc = changeset.get("desc", "").strip() + if desc: + commit_descriptions.append(desc) + + changes_output = "\n".join(commit_descriptions) else: logger.error( f"Failed to retrieve the webpage. Status code: {response.status_code}" @@ -222,6 +223,7 @@ def generate_worthy_commits(self, version): "disable test", ] cleaned_commits = self.clean_commits(changes_output, keywords_to_remove) + print(f"CLEANED COMMITS: {cleaned_commits}") logger.info("Generating summaries for cleaned commits...") summaries = self.generate_summaries(cleaned_commits) From 2d0030cc48cd522b6241c6beee221642e3e57794 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 4 Mar 2025 22:50:05 -0500 Subject: [PATCH 22/70] Removed .get and response 200 --- bugbug/tools/release_notes.py | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 8c77691af8..9821154184 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -190,25 +190,21 @@ def generate_worthy_commits(self, version): url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) changes_output = "" - if response.status_code == 200: - data = response.json() - commit_descriptions = [] - - for push_id in data: - push_data = data[push_id] - changesets = push_data.get("changesets", []) - - for changeset in changesets: - desc = changeset.get("desc", "").strip() - if desc: - commit_descriptions.append(desc) - - changes_output = "\n".join(commit_descriptions) - else: - logger.error( - f"Failed to retrieve the webpage. Status code: {response.status_code}" - ) - return + response.raise_for_status() + + data = response.json() + commit_descriptions = [] + + for push_id in data: + push_data = data[push_id] + changesets = push_data["changesets"] + + for changeset in changesets: + desc = changeset["desc"].strip() + if desc: + commit_descriptions.append(desc) + + changes_output = "\n".join(commit_descriptions) if not changes_output: logger.error("No changes found.") @@ -223,7 +219,6 @@ def generate_worthy_commits(self, version): "disable test", ] cleaned_commits = self.clean_commits(changes_output, keywords_to_remove) - print(f"CLEANED COMMITS: {cleaned_commits}") logger.info("Generating summaries for cleaned commits...") summaries = self.generate_summaries(cleaned_commits) From fbb3c30c06a8c44d60fc62d68a3d534a28f38bfa Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 4 Mar 2025 23:17:33 -0500 Subject: [PATCH 23/70] Made input and output list instead of string --- bugbug/tools/release_notes.py | 58 +++++++++++++++-------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 9821154184..d0b79169b9 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -31,7 +31,7 @@ def get_token_count(self, text): return len(encoding.encode(text)) def split_into_chunks(self, commit_log): - commit_blocks = commit_log.split("\n\n") + commit_blocks = commit_log.split("\n") chunks = [] current_chunk = [] current_token_count = 0 @@ -96,26 +96,22 @@ def summarize_with_gpt(self, input_text): - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - Do not include the words "CSV" or any headers—just the data. """ - try: - response = client.chat.completions.create( - messages=[{"role": "user", "content": prompt}], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error calling OpenAI API: {e}") - return "Error: Unable to generate summary." - - def generate_summaries(self, commit_log): - chunks = self.split_into_chunks(commit_log) + response = client.chat.completions.create( + messages=[{"role": "user", "content": prompt}], + model=MODEL, + temperature=0.1, + ) + return response.choices[0].message.content.strip() + + def generate_summaries(self, commit_log_list): + commit_log_list_combined = "\n".join(commit_log_list) + chunks = self.split_into_chunks(commit_log_list_combined) return [self.summarize_with_gpt(chunk) for chunk in chunks] - def clean_commits(self, commit_log, keywords): + def clean_commits(self, commit_log_list, keywords): cleaned_commits = [] - commit_blocks = commit_log.split("\n") - for block in commit_blocks: + for block in commit_log_list: if ( not any( re.search(rf"\b{keyword}\b", block, re.IGNORECASE) @@ -138,9 +134,10 @@ def clean_commits(self, commit_log, keywords): commit_summary = block cleaned_commits.append(commit_summary) - return "\n\n".join(cleaned_commits) + return cleaned_commits - def remove_unworthy_commits(self, input_text): + def remove_unworthy_commits(self, summaries): + combined_list = "\n".join(summaries) prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: - New features that users will notice and interact with. - Significant fixes that resolve major user-facing issues. @@ -157,7 +154,7 @@ def remove_unworthy_commits(self, input_text): - Duplicate entries or similar changes that were already listed. Here is the list to filter: -{input_text} +{combined_list} Instructions: - KEEP THE SAME FORMAT (do not change the structure of entries that remain). @@ -189,11 +186,10 @@ def generate_worthy_commits(self, version): logger.info(f"Generating list of commits for version: {self.version2}") url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) - changes_output = "" response.raise_for_status() data = response.json() - commit_descriptions = [] + commit_log_list = [] for push_id in data: push_data = data[push_id] @@ -202,11 +198,9 @@ def generate_worthy_commits(self, version): for changeset in changesets: desc = changeset["desc"].strip() if desc: - commit_descriptions.append(desc) + commit_log_list.append(desc) - changes_output = "\n".join(commit_descriptions) - - if not changes_output: + if not commit_log_list: logger.error("No changes found.") return @@ -218,16 +212,12 @@ def generate_worthy_commits(self, version): "add tests", "disable test", ] - cleaned_commits = self.clean_commits(changes_output, keywords_to_remove) + cleaned_commits = self.clean_commits(commit_log_list, keywords_to_remove) logger.info("Generating summaries for cleaned commits...") - summaries = self.generate_summaries(cleaned_commits) - combined_list = "\n".join(summaries) + summaries_list = self.generate_summaries(cleaned_commits) logger.info("Removing unworthy commits from the list...") - combined_list = self.remove_unworthy_commits(combined_list) - - with open(self.output_file, "w") as file: - file.write(combined_list) + combined_list = self.remove_unworthy_commits(summaries_list) - logger.info(f"Worthy commits saved to {self.output_file}") + print(combined_list) From 3d6f4d00d9730ba96ec34f043c8a124213f817c1 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Mar 2025 10:13:03 -0500 Subject: [PATCH 24/70] Using LangChain --- bugbug/tools/release_notes.py | 39 ++++++++++++--------------------- scripts/release_notes_runner.py | 7 +++--- 2 files changed, 18 insertions(+), 28 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index d0b79169b9..08b069f9b6 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -4,10 +4,10 @@ import requests import tiktoken +from langchain.schema import HumanMessage +from langchain_openai import ChatOpenAI from openai import OpenAI -MODEL = "gpt-4o" - client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) logging.basicConfig(level=logging.INFO) @@ -15,8 +15,14 @@ class ReleaseNotesGenerator: - def __init__(self, chunk_size=10000): + def __init__(self, chunk_size=10000, model="gpt-4o"): + self.model = model self.chunk_size = chunk_size + self.llm = ChatOpenAI( + model=model, + temperature=0.1, + openai_api_key=os.environ.get("OPENAI_API_KEY"), + ) def get_previous_version(self, current_version): match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version) @@ -27,7 +33,7 @@ def get_previous_version(self, current_version): return f"{prefix}{previous_version_number}{suffix}" def get_token_count(self, text): - encoding = tiktoken.encoding_for_model(MODEL) + encoding = tiktoken.encoding_for_model(self.model) return len(encoding.encode(text)) def split_into_chunks(self, commit_log): @@ -96,12 +102,8 @@ def summarize_with_gpt(self, input_text): - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - Do not include the words "CSV" or any headers—just the data. """ - response = client.chat.completions.create( - messages=[{"role": "user", "content": prompt}], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() + response = self.llm.invoke([HumanMessage(content=prompt)]) + return response.content.strip() def generate_summaries(self, commit_log_list): commit_log_list_combined = "\n".join(commit_log_list) @@ -162,21 +164,8 @@ def remove_unworthy_commits(self, summaries): - DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. - The output must be only the cleaned-up list, formatted exactly the same way. """ - try: - response = client.chat.completions.create( - messages=[ - { - "role": "user", - "content": prompt, - } - ], - model=MODEL, - temperature=0.1, - ) - return response.choices[0].message.content.strip() - except Exception as e: - logger.error(f"Error while calling OpenAI API: {e}") - return "Error: Unable to remove unworthy commits." + response = self.llm.invoke([HumanMessage(content=prompt)]) + return response.content.strip() def generate_worthy_commits(self, version): self.version2 = version diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index f6de2c88da..db5e1b040a 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -13,12 +13,13 @@ def main(): parser.add_argument( "--chunk-size", type=int, default=10000, help="Chunk size for token processing" ) + parser.add_argument( + "--model", default="gpt-4o", help="Model to use for summarization" + ) args = parser.parse_args() - generator = ReleaseNotesGenerator( - chunk_size=args.chunk_size, - ) + generator = ReleaseNotesGenerator(chunk_size=args.chunk_size, model=args.model) generator.generate_worthy_commits(version=args.version) From c220bb3140d99d19d8daacd40ac7fa7dc4ae2d8f Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Mar 2025 10:18:53 -0500 Subject: [PATCH 25/70] Using data.values() --- bugbug/tools/release_notes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 08b069f9b6..1d03cbf1e8 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -180,8 +180,7 @@ def generate_worthy_commits(self, version): data = response.json() commit_log_list = [] - for push_id in data: - push_data = data[push_id] + for push_data in data.values(): changesets = push_data["changesets"] for changeset in changesets: From 1bf92b2f39807d2cbc5e86ba53bd79f034692ec2 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Mar 2025 15:42:25 -0500 Subject: [PATCH 26/70] Added LLMChain --- bugbug/tools/release_notes.py | 172 +++++++++++++++++--------------- scripts/release_notes_runner.py | 20 +++- 2 files changed, 108 insertions(+), 84 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 1d03cbf1e8..f0b12a0c2a 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -4,8 +4,8 @@ import requests import tiktoken -from langchain.schema import HumanMessage -from langchain_openai import ChatOpenAI +from langchain.chains import LLMChain +from langchain.prompts import PromptTemplate from openai import OpenAI client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) @@ -15,13 +15,87 @@ class ReleaseNotesGenerator: - def __init__(self, chunk_size=10000, model="gpt-4o"): - self.model = model + def __init__(self, chunk_size, llm): self.chunk_size = chunk_size - self.llm = ChatOpenAI( - model=model, - temperature=0.1, - openai_api_key=os.environ.get("OPENAI_API_KEY"), + self.llm = llm + self.summarization_prompt = PromptTemplate( + input_variables=["input_text"], + template="""You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: + +1. Must Include Only Meaningful Changes: + - Only keep commits that significantly impact users and are strictly user-facing, such as: + - New features + - UI changes + - Major performance improvements + - Security patches (if user-facing) + - Web platform changes that affect how websites behave + - DO NOT include: + - Small bug fixes unless critical + - Internal code refactoring + - Test changes or documentation updates + - Developer tooling or CI/CD pipeline changes +Again, only include changes that are STRICTLY USER-FACING. + +2. Output Format: + - Use simple, non-technical language suitable for release notes. + - Use the following strict format for each relevant commit, in CSV FORMAT: +[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users + - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. + +3. Be Aggressive in Filtering: + - If you're unsure whether a commit impacts end users, EXCLUDE it. + - Do not list developer-focused changes. + +4. Select Only the Top 10 Commits: + - If there are more than 10 relevant commits, choose the most impactful ones. + +5. Input: + Here is the chunk of commit logs you need to focus on: + {input_text} + +6. Output Requirements: + - Output must be raw CSV text—no formatting, no extra text. + - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. + - Do not include the words "CSV" or any headers—just the data. +""", + ) + + self.summarization_chain = LLMChain( + llm=self.llm, + prompt=self.summarization_prompt, + ) + + self.cleanup_prompt = PromptTemplate( + input_variables=["combined_list"], + template="""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: +- New features that users will notice and interact with. +- Significant fixes that resolve major user-facing issues. +- Performance improvements that make a clear difference in speed or responsiveness. +- Accessibility enhancements that improve usability for a broad set of users. +- Critical security updates that protect users from vulnerabilities. + +Strict Filtering Criteria - REMOVE the following: +- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments). +- Developer-facing features that have no direct user impact. +- Minor UI refinements (e.g., button width adjustments, small animation tweaks). +- Bug fixes that don’t impact most users. +- Obscure web compatibility changes that apply only to edge-case websites. +- Duplicate entries or similar changes that were already listed. + +Here is the list to filter: +{combined_list} + +Instructions: +- KEEP THE SAME FORMAT (do not change the structure of entries that remain). +- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). +- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. +- The output must be only the cleaned-up list, formatted exactly the same way. +""", + ) + + self.cleanup_chain = LLMChain( + llm=self.llm, + prompt=self.cleanup_prompt, ) def get_previous_version(self, current_version): @@ -33,7 +107,12 @@ def get_previous_version(self, current_version): return f"{prefix}{previous_version_number}{suffix}" def get_token_count(self, text): - encoding = tiktoken.encoding_for_model(self.model) + if hasattr(self.llm, "model_name"): + model_name = self.llm.model_name + else: + raise ValueError("LLM model name not found.") + + encoding = tiktoken.encoding_for_model(model_name) return len(encoding.encode(text)) def split_into_chunks(self, commit_log): @@ -59,51 +138,7 @@ def split_into_chunks(self, commit_log): return chunks def summarize_with_gpt(self, input_text): - prompt = f""" -You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: - -1. Must Include Only Meaningful Changes: - - Only keep commits that significantly impact users and are strictly user-facing, such as: - - New features - - UI changes - - Major performance improvements - - Security patches (if user-facing) - - Web platform changes that affect how websites behave - - DO NOT include: - - Small bug fixes unless critical - - Internal code refactoring - - Test changes or documentation updates - - Developer tooling or CI/CD pipeline changes -Again, only include changes that are STRICTLY USER-FACING. - -2. Output Format: - - Use simple, non-technical language suitable for release notes. - - Use the following strict format for each relevant commit, in CSV FORMAT: -[Type of Change],Description of the change,Bug XXXX,Reason why the change is impactful for end users - - Possible types of change: [Feature], [Fix], [Performance], [Security], [UI], [DevTools], [Web Platform], etc. - -3. Bad Example (DO NOT FOLLOW): -[Feature],Enable async FlushRendering during resizing window if Windows DirectComposition is used,Bug 1922721,Improves performance and responsiveness when resizing windows on systems using Windows DirectComposition. -We should exclude this change because it contains technical jargon that is unclear to general users, making it difficult to understand. Additionally, the impact is limited to a specific subset of Windows users with DirectComposition enabled, and the improvement is not significant enough to be noteworthy in the release notes. - -4. Be Aggressive in Filtering: - - If you're unsure whether a commit impacts end users, EXCLUDE it. - - Do not list developer-focused changes. - -5. Select Only the Top 10 Commits: - - If there are more than 10 relevant commits, choose the most impactful ones. - -6. Input: - Here is the chunk of commit logs you need to focus on: - {input_text} - -7. Output Requirements: - - Output must be raw CSV text—no formatting, no extra text. - - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - - Do not include the words "CSV" or any headers—just the data. -""" - response = self.llm.invoke([HumanMessage(content=prompt)]) - return response.content.strip() + return self.summarization_chain.run({"input_text": input_text}).strip() def generate_summaries(self, commit_log_list): commit_log_list_combined = "\n".join(commit_log_list) @@ -140,32 +175,7 @@ def clean_commits(self, commit_log_list, keywords): def remove_unworthy_commits(self, summaries): combined_list = "\n".join(summaries) - prompt = f"""Review the following list of release notes and remove anything that is not worthy of official release notes. Keep only changes that are meaningful, impactful, and directly relevant to end users, such as: -- New features that users will notice and interact with. -- Significant fixes that resolve major user-facing issues. -- Performance improvements that make a clear difference in speed or responsiveness. -- Accessibility enhancements that improve usability for a broad set of users. -- Critical security updates that protect users from vulnerabilities. - -Strict Filtering Criteria - REMOVE the following: -- Overly technical web platform changes (e.g., spec compliance tweaks, behind-the-scenes API adjustments). -- Developer-facing features that have no direct user impact. -- Minor UI refinements (e.g., button width adjustments, small animation tweaks). -- Bug fixes that don’t impact most users. -- Obscure web compatibility changes that apply only to edge-case websites. -- Duplicate entries or similar changes that were already listed. - -Here is the list to filter: -{combined_list} - -Instructions: -- KEEP THE SAME FORMAT (do not change the structure of entries that remain). -- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). -- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. -- The output must be only the cleaned-up list, formatted exactly the same way. -""" - response = self.llm.invoke([HumanMessage(content=prompt)]) - return response.content.strip() + return self.cleanup_chain.run({"combined_list": combined_list}).strip() def generate_worthy_commits(self, version): self.version2 = version @@ -208,4 +218,4 @@ def generate_worthy_commits(self, version): logger.info("Removing unworthy commits from the list...") combined_list = self.remove_unworthy_commits(summaries_list) - print(combined_list) + return combined_list diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index db5e1b040a..1e3d203403 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -1,5 +1,8 @@ import argparse import logging +import os + +from langchain_openai import ChatOpenAI from bugbug.tools.release_notes import ReleaseNotesGenerator @@ -14,13 +17,24 @@ def main(): "--chunk-size", type=int, default=10000, help="Chunk size for token processing" ) parser.add_argument( - "--model", default="gpt-4o", help="Model to use for summarization" + "--llm", default="openai-gpt-4o", help="Model to use for summarization" ) args = parser.parse_args() - generator = ReleaseNotesGenerator(chunk_size=args.chunk_size, model=args.model) - generator.generate_worthy_commits(version=args.version) + if args.llm.startswith("openai-"): + model_name = args.llm.replace("openai-", "") + llm = ChatOpenAI( + model=model_name, + temperature=0.1, + openai_api_key=os.environ.get("OPENAI_API_KEY"), + ) + else: + raise ValueError(f"Unsupported LLM provider: {args.llm}") + + generator = ReleaseNotesGenerator(chunk_size=args.chunk_size, llm=llm) + results = generator.generate_worthy_commits(version=args.version) + print(results) if __name__ == "__main__": From 06af4d87038764c3a72dc70630deca0799dd3e6d Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Mar 2025 19:37:47 -0500 Subject: [PATCH 27/70] Cleaned up code --- bugbug/tools/release_notes.py | 13 ++++++------- scripts/release_notes_runner.py | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index f0b12a0c2a..91bf3d0222 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -class ReleaseNotesGenerator: +class ReleaseNotesCommitsSelector: def __init__(self, chunk_size, llm): self.chunk_size = chunk_size self.llm = llm @@ -137,13 +137,13 @@ def split_into_chunks(self, commit_log): return chunks - def summarize_with_gpt(self, input_text): + def shortlist_with_gpt(self, input_text): return self.summarization_chain.run({"input_text": input_text}).strip() - def generate_summaries(self, commit_log_list): + def generate_commit_shortlist(self, commit_log_list): commit_log_list_combined = "\n".join(commit_log_list) chunks = self.split_into_chunks(commit_log_list_combined) - return [self.summarize_with_gpt(chunk) for chunk in chunks] + return [self.shortlist_with_gpt(chunk) for chunk in chunks] def clean_commits(self, commit_log_list, keywords): cleaned_commits = [] @@ -177,7 +177,7 @@ def remove_unworthy_commits(self, summaries): combined_list = "\n".join(summaries) return self.cleanup_chain.run({"combined_list": combined_list}).strip() - def generate_worthy_commits(self, version): + def select_worthy_commits(self, version): self.version2 = version self.version1 = self.get_previous_version(version) self.output_file = f"version_summary_{self.version2}.txt" @@ -199,7 +199,6 @@ def generate_worthy_commits(self, version): commit_log_list.append(desc) if not commit_log_list: - logger.error("No changes found.") return logger.info("Cleaning commit log...") @@ -213,7 +212,7 @@ def generate_worthy_commits(self, version): cleaned_commits = self.clean_commits(commit_log_list, keywords_to_remove) logger.info("Generating summaries for cleaned commits...") - summaries_list = self.generate_summaries(cleaned_commits) + summaries_list = self.generate_commit_shortlist(cleaned_commits) logger.info("Removing unworthy commits from the list...") combined_list = self.remove_unworthy_commits(summaries_list) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index 1e3d203403..c4931c9334 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -4,7 +4,7 @@ from langchain_openai import ChatOpenAI -from bugbug.tools.release_notes import ReleaseNotesGenerator +from bugbug.tools.release_notes import ReleaseNotesCommitsSelector logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -32,8 +32,8 @@ def main(): else: raise ValueError(f"Unsupported LLM provider: {args.llm}") - generator = ReleaseNotesGenerator(chunk_size=args.chunk_size, llm=llm) - results = generator.generate_worthy_commits(version=args.version) + selector = ReleaseNotesCommitsSelector(chunk_size=args.chunk_size, llm=llm) + results = selector.select_worthy_commits(version=args.version) print(results) From a14ad873813d1965b9e8d42e891aeceb17cf39cb Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 5 Mar 2025 19:45:13 -0500 Subject: [PATCH 28/70] Added typings --- bugbug/tools/release_notes.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 91bf3d0222..e44ab0cb79 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,6 +1,7 @@ import logging import os import re +from typing import List, Optional import requests import tiktoken @@ -15,7 +16,7 @@ class ReleaseNotesCommitsSelector: - def __init__(self, chunk_size, llm): + def __init__(self, chunk_size: int, llm: LLMChain): self.chunk_size = chunk_size self.llm = llm self.summarization_prompt = PromptTemplate( @@ -98,7 +99,7 @@ def __init__(self, chunk_size, llm): prompt=self.cleanup_prompt, ) - def get_previous_version(self, current_version): + def get_previous_version(self, current_version: str) -> str: match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version) if not match: raise ValueError("Invalid version format") @@ -106,7 +107,7 @@ def get_previous_version(self, current_version): previous_version_number = int(version_number) - 1 return f"{prefix}{previous_version_number}{suffix}" - def get_token_count(self, text): + def get_token_count(self, text: str) -> int: if hasattr(self.llm, "model_name"): model_name = self.llm.model_name else: @@ -115,10 +116,10 @@ def get_token_count(self, text): encoding = tiktoken.encoding_for_model(model_name) return len(encoding.encode(text)) - def split_into_chunks(self, commit_log): + def split_into_chunks(self, commit_log: str) -> List[str]: commit_blocks = commit_log.split("\n") chunks = [] - current_chunk = [] + current_chunk: List[str] = [] current_token_count = 0 for block in commit_blocks: @@ -137,15 +138,17 @@ def split_into_chunks(self, commit_log): return chunks - def shortlist_with_gpt(self, input_text): + def shortlist_with_gpt(self, input_text: str) -> str: return self.summarization_chain.run({"input_text": input_text}).strip() - def generate_commit_shortlist(self, commit_log_list): + def generate_commit_shortlist(self, commit_log_list: List[str]) -> List[str]: commit_log_list_combined = "\n".join(commit_log_list) chunks = self.split_into_chunks(commit_log_list_combined) return [self.shortlist_with_gpt(chunk) for chunk in chunks] - def clean_commits(self, commit_log_list, keywords): + def clean_commits( + self, commit_log_list: List[str], keywords: List[str] + ) -> List[str]: cleaned_commits = [] for block in commit_log_list: @@ -173,11 +176,11 @@ def clean_commits(self, commit_log_list, keywords): return cleaned_commits - def remove_unworthy_commits(self, summaries): + def remove_unworthy_commits(self, summaries: List[str]) -> str: combined_list = "\n".join(summaries) return self.cleanup_chain.run({"combined_list": combined_list}).strip() - def select_worthy_commits(self, version): + def select_worthy_commits(self, version: str) -> Optional[str]: self.version2 = version self.version1 = self.get_previous_version(version) self.output_file = f"version_summary_{self.version2}.txt" @@ -199,7 +202,7 @@ def select_worthy_commits(self, version): commit_log_list.append(desc) if not commit_log_list: - return + return None logger.info("Cleaning commit log...") keywords_to_remove = [ @@ -215,6 +218,4 @@ def select_worthy_commits(self, version): summaries_list = self.generate_commit_shortlist(cleaned_commits) logger.info("Removing unworthy commits from the list...") - combined_list = self.remove_unworthy_commits(summaries_list) - - return combined_list + return self.remove_unworthy_commits(summaries_list) From 20d0b6ebf2822b76f5564f97c18ceb635fc77b63 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 13:13:31 -0500 Subject: [PATCH 29/70] Removed OpenAI --- bugbug/tools/release_notes.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index e44ab0cb79..c7c0ea0a24 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,5 +1,4 @@ import logging -import os import re from typing import List, Optional @@ -7,9 +6,6 @@ import tiktoken from langchain.chains import LLMChain from langchain.prompts import PromptTemplate -from openai import OpenAI - -client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) From b48089fc47e80aa00ad46da20931202f0b2eb742 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 13:23:01 -0500 Subject: [PATCH 30/70] Changed type hints from List to list --- bugbug/tools/release_notes.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index c7c0ea0a24..366751ff7e 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,6 +1,6 @@ import logging import re -from typing import List, Optional +from typing import Optional import requests import tiktoken @@ -112,10 +112,10 @@ def get_token_count(self, text: str) -> int: encoding = tiktoken.encoding_for_model(model_name) return len(encoding.encode(text)) - def split_into_chunks(self, commit_log: str) -> List[str]: + def split_into_chunks(self, commit_log: str) -> list[str]: commit_blocks = commit_log.split("\n") chunks = [] - current_chunk: List[str] = [] + current_chunk: list[str] = [] current_token_count = 0 for block in commit_blocks: @@ -137,14 +137,14 @@ def split_into_chunks(self, commit_log: str) -> List[str]: def shortlist_with_gpt(self, input_text: str) -> str: return self.summarization_chain.run({"input_text": input_text}).strip() - def generate_commit_shortlist(self, commit_log_list: List[str]) -> List[str]: + def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: commit_log_list_combined = "\n".join(commit_log_list) chunks = self.split_into_chunks(commit_log_list_combined) return [self.shortlist_with_gpt(chunk) for chunk in chunks] def clean_commits( - self, commit_log_list: List[str], keywords: List[str] - ) -> List[str]: + self, commit_log_list: list[str], keywords: list[str] + ) -> list[str]: cleaned_commits = [] for block in commit_log_list: @@ -172,7 +172,7 @@ def clean_commits( return cleaned_commits - def remove_unworthy_commits(self, summaries: List[str]) -> str: + def remove_unworthy_commits(self, summaries: list[str]) -> str: combined_list = "\n".join(summaries) return self.cleanup_chain.run({"combined_list": combined_list}).strip() From 5bd61ade10515da0bf073cb28f61c4c94ab51b4d Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 13:44:45 -0500 Subject: [PATCH 31/70] Removed regex search for bug id --- bugbug/tools/release_notes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 366751ff7e..a9b52fae59 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -153,7 +153,6 @@ def clean_commits( re.search(rf"\b{keyword}\b", block, re.IGNORECASE) for keyword in keywords ) - and re.search(r"Bug \d+", block, re.IGNORECASE) and not re.search( r"release\+treescript@mozilla\.org", block, re.IGNORECASE ) From 2dddedb80a2a3ba8b7cbc8fdda120ddfd4345ad2 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 14:20:45 -0500 Subject: [PATCH 32/70] Replaced token chunking with commit chunking --- bugbug/tools/release_notes.py | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index a9b52fae59..c387dc812c 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,9 +1,9 @@ import logging import re +from itertools import batched from typing import Optional import requests -import tiktoken from langchain.chains import LLMChain from langchain.prompts import PromptTemplate @@ -103,34 +103,12 @@ def get_previous_version(self, current_version: str) -> str: previous_version_number = int(version_number) - 1 return f"{prefix}{previous_version_number}{suffix}" - def get_token_count(self, text: str) -> int: - if hasattr(self.llm, "model_name"): - model_name = self.llm.model_name - else: - raise ValueError("LLM model name not found.") - - encoding = tiktoken.encoding_for_model(model_name) - return len(encoding.encode(text)) - def split_into_chunks(self, commit_log: str) -> list[str]: - commit_blocks = commit_log.split("\n") + lines = commit_log.strip().split("\n") chunks = [] - current_chunk: list[str] = [] - current_token_count = 0 - - for block in commit_blocks: - block_token_count = self.get_token_count(block) - - if current_token_count + block_token_count > self.chunk_size: - chunks.append("\n\n".join(current_chunk)) - current_chunk = [] - current_token_count = 0 - - current_chunk.append(block) - current_token_count += block_token_count - if current_chunk: - chunks.append("\n\n".join(current_chunk)) + for batch in batched(lines, self.chunk_size): + chunks.append("\n".join(batch)) return chunks From 3572bd8fe3b25956c3d42472bb8a7a75277b05d4 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 14:21:07 -0500 Subject: [PATCH 33/70] Changed chunk param to commit chunk --- scripts/release_notes_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index c4931c9334..0f0fe00ef5 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -14,7 +14,7 @@ def main(): parser = argparse.ArgumentParser(description="Generate Firefox release notes.") parser.add_argument("--version", required=True, help="Target version identifier") parser.add_argument( - "--chunk-size", type=int, default=10000, help="Chunk size for token processing" + "--chunk-size", type=int, default=100, help="Number of commits per chunk" ) parser.add_argument( "--llm", default="openai-gpt-4o", help="Model to use for summarization" From 020fed350e261d805d90e2cbfcdf6462258d5088 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 15:25:33 -0500 Subject: [PATCH 34/70] Renamed functions --- bugbug/tools/release_notes.py | 39 ++++++++++++++------------------- scripts/release_notes_runner.py | 2 +- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index c387dc812c..0564dfc01e 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -7,6 +7,14 @@ from langchain.chains import LLMChain from langchain.prompts import PromptTemplate +KEYWORDS_TO_REMOVE = [ + "Backed out", + "a=testonly", + "DONTBUILD", + "add tests", + "disable test", +] + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -103,7 +111,7 @@ def get_previous_version(self, current_version: str) -> str: previous_version_number = int(version_number) - 1 return f"{prefix}{previous_version_number}{suffix}" - def split_into_chunks(self, commit_log: str) -> list[str]: + def batch_commit_logs(self, commit_log: str) -> list[str]: lines = commit_log.strip().split("\n") chunks = [] @@ -117,29 +125,24 @@ def shortlist_with_gpt(self, input_text: str) -> str: def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: commit_log_list_combined = "\n".join(commit_log_list) - chunks = self.split_into_chunks(commit_log_list_combined) + chunks = self.batch_commit_logs(commit_log_list_combined) return [self.shortlist_with_gpt(chunk) for chunk in chunks] - def clean_commits( - self, commit_log_list: list[str], keywords: list[str] - ) -> list[str]: + def filter_irrelevant_commits(self, commit_log_list: list[str]) -> list[str]: cleaned_commits = [] for block in commit_log_list: if ( not any( re.search(rf"\b{keyword}\b", block, re.IGNORECASE) - for keyword in keywords + for keyword in KEYWORDS_TO_REMOVE ) + and re.search(r"Bug \d+", block, re.IGNORECASE) and not re.search( r"release\+treescript@mozilla\.org", block, re.IGNORECASE ) and not re.search(r"nightly", block, re.IGNORECASE) ): - bug_id_match = re.search(r"Bug (\d+)", block, re.IGNORECASE) - if not bug_id_match: - continue - bug_position = re.search(r"Bug \d+.*", block, re.IGNORECASE) if bug_position: block = bug_position.group(0) @@ -149,14 +152,13 @@ def clean_commits( return cleaned_commits - def remove_unworthy_commits(self, summaries: list[str]) -> str: + def refine_commit_shortlist(self, summaries: list[str]) -> str: combined_list = "\n".join(summaries) return self.cleanup_chain.run({"combined_list": combined_list}).strip() - def select_worthy_commits(self, version: str) -> Optional[str]: + def get_final_release_notes_commits(self, version: str) -> Optional[str]: self.version2 = version self.version1 = self.get_previous_version(version) - self.output_file = f"version_summary_{self.version2}.txt" logger.info(f"Generating list of commits for version: {self.version2}") url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" @@ -178,17 +180,10 @@ def select_worthy_commits(self, version: str) -> Optional[str]: return None logger.info("Cleaning commit log...") - keywords_to_remove = [ - "Backed out", - "a=testonly", - "DONTBUILD", - "add tests", - "disable test", - ] - cleaned_commits = self.clean_commits(commit_log_list, keywords_to_remove) + cleaned_commits = self.filter_irrelevant_commits(commit_log_list) logger.info("Generating summaries for cleaned commits...") summaries_list = self.generate_commit_shortlist(cleaned_commits) logger.info("Removing unworthy commits from the list...") - return self.remove_unworthy_commits(summaries_list) + return self.refine_commit_shortlist(summaries_list) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index 0f0fe00ef5..f2e3114524 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -33,7 +33,7 @@ def main(): raise ValueError(f"Unsupported LLM provider: {args.llm}") selector = ReleaseNotesCommitsSelector(chunk_size=args.chunk_size, llm=llm) - results = selector.select_worthy_commits(version=args.version) + results = selector.get_final_release_notes_commits(version=args.version) print(results) From 1c5cbe25bf72825b85ae398b21a5e9f8d311f6a9 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 6 Mar 2025 15:46:54 -0500 Subject: [PATCH 35/70] Fixed variable names --- bugbug/tools/release_notes.py | 55 +++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 0564dfc01e..322f60e283 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -112,13 +112,10 @@ def get_previous_version(self, current_version: str) -> str: return f"{prefix}{previous_version_number}{suffix}" def batch_commit_logs(self, commit_log: str) -> list[str]: - lines = commit_log.strip().split("\n") - chunks = [] - - for batch in batched(lines, self.chunk_size): - chunks.append("\n".join(batch)) - - return chunks + return [ + "\n".join(batch) + for batch in batched(commit_log.strip().split("\n"), self.chunk_size) + ] def shortlist_with_gpt(self, input_text: str) -> str: return self.summarization_chain.run({"input_text": input_text}).strip() @@ -156,34 +153,42 @@ def refine_commit_shortlist(self, summaries: list[str]) -> str: combined_list = "\n".join(summaries) return self.cleanup_chain.run({"combined_list": combined_list}).strip() - def get_final_release_notes_commits(self, version: str) -> Optional[str]: - self.version2 = version - self.version1 = self.get_previous_version(version) - - logger.info(f"Generating list of commits for version: {self.version2}") + def get_commit_logs(self, version: str) -> Optional[list[str]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) response.raise_for_status() data = response.json() - commit_log_list = [] + commit_log_list = [ + changeset["desc"].strip() + for push_data in data.values() + for changeset in push_data["changesets"] + if "desc" in changeset and changeset["desc"].strip() + ] + + return commit_log_list if commit_log_list else None - for push_data in data.values(): - changesets = push_data["changesets"] + def get_final_release_notes_commits(self, version: str) -> Optional[str]: + self.version2 = version + self.version1 = self.get_previous_version(version) - for changeset in changesets: - desc = changeset["desc"].strip() - if desc: - commit_log_list.append(desc) + logger.info(f"Generating commit shortlist for: {self.version2}") + commit_log_list = self.get_commit_logs(version) if not commit_log_list: return None - logger.info("Cleaning commit log...") - cleaned_commits = self.filter_irrelevant_commits(commit_log_list) + logger.info("Filtering irrelevant commits...") + filtered_commits = self.filter_irrelevant_commits(commit_log_list) - logger.info("Generating summaries for cleaned commits...") - summaries_list = self.generate_commit_shortlist(cleaned_commits) + if not filtered_commits: + return None + + logger.info("Generating commit shortlist...") + commit_shortlist = self.generate_commit_shortlist(filtered_commits) + + if not commit_shortlist: + return None - logger.info("Removing unworthy commits from the list...") - return self.refine_commit_shortlist(summaries_list) + logger.info("Refining commit shortlistt...") + return self.refine_commit_shortlist(commit_shortlist) From 0d173ad9400b3e2bbcb3004146643b57263640e6 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 7 Mar 2025 15:22:04 -0500 Subject: [PATCH 36/70] Changed to generator --- bugbug/tools/release_notes.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 322f60e283..6a6f7a064f 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,7 +1,7 @@ import logging import re from itertools import batched -from typing import Optional +from typing import Generator, Optional import requests from langchain.chains import LLMChain @@ -125,9 +125,9 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: chunks = self.batch_commit_logs(commit_log_list_combined) return [self.shortlist_with_gpt(chunk) for chunk in chunks] - def filter_irrelevant_commits(self, commit_log_list: list[str]) -> list[str]: - cleaned_commits = [] - + def filter_irrelevant_commits( + self, commit_log_list: list[str] + ) -> Generator[str, None, None]: for block in commit_log_list: if ( not any( @@ -145,9 +145,7 @@ def filter_irrelevant_commits(self, commit_log_list: list[str]) -> list[str]: block = bug_position.group(0) commit_summary = block - cleaned_commits.append(commit_summary) - - return cleaned_commits + yield commit_summary def refine_commit_shortlist(self, summaries: list[str]) -> str: combined_list = "\n".join(summaries) @@ -179,7 +177,7 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: return None logger.info("Filtering irrelevant commits...") - filtered_commits = self.filter_irrelevant_commits(commit_log_list) + filtered_commits = list(self.filter_irrelevant_commits(commit_log_list)) if not filtered_commits: return None From 030d7053530c148b429a983ed53e3eef16e1dc48 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 13:48:52 -0400 Subject: [PATCH 37/70] Removed shortlist_with_gpt function --- bugbug/tools/release_notes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 6a6f7a064f..79d98eda5b 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -117,13 +117,13 @@ def batch_commit_logs(self, commit_log: str) -> list[str]: for batch in batched(commit_log.strip().split("\n"), self.chunk_size) ] - def shortlist_with_gpt(self, input_text: str) -> str: - return self.summarization_chain.run({"input_text": input_text}).strip() - def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: commit_log_list_combined = "\n".join(commit_log_list) chunks = self.batch_commit_logs(commit_log_list_combined) - return [self.shortlist_with_gpt(chunk) for chunk in chunks] + return [ + self.summarization_chain.run({"input_text": chunk}).strip() + for chunk in chunks + ] def filter_irrelevant_commits( self, commit_log_list: list[str] From 6326f668c6f0916f4008080586f79e4fcbd99dfc Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 14:01:01 -0400 Subject: [PATCH 38/70] Simplified filtering irrelevant commits --- bugbug/tools/release_notes.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 79d98eda5b..8a9e769339 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -142,10 +142,7 @@ def filter_irrelevant_commits( ): bug_position = re.search(r"Bug \d+.*", block, re.IGNORECASE) if bug_position: - block = bug_position.group(0) - - commit_summary = block - yield commit_summary + yield bug_position.group(0) def refine_commit_shortlist(self, summaries: list[str]) -> str: combined_list = "\n".join(summaries) From e25aad56c7d0ebda2322ea29b71ce0c4ab12bd60 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 14:02:41 -0400 Subject: [PATCH 39/70] Removed refining shortlist function --- bugbug/tools/release_notes.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 8a9e769339..da06ba1a17 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -144,10 +144,6 @@ def filter_irrelevant_commits( if bug_position: yield bug_position.group(0) - def refine_commit_shortlist(self, summaries: list[str]) -> str: - combined_list = "\n".join(summaries) - return self.cleanup_chain.run({"combined_list": combined_list}).strip() - def get_commit_logs(self, version: str) -> Optional[list[str]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) @@ -186,4 +182,5 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: return None logger.info("Refining commit shortlistt...") - return self.refine_commit_shortlist(commit_shortlist) + combined_list = "\n".join(commit_shortlist) + return self.cleanup_chain.run({"combined_list": combined_list}).strip() From 64185519441ee9f8f7bfdf491783c89f77161562 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 14:33:33 -0400 Subject: [PATCH 40/70] Added author filtering --- bugbug/tools/release_notes.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index da06ba1a17..8602e091e7 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -126,32 +126,32 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: ] def filter_irrelevant_commits( - self, commit_log_list: list[str] + self, commit_log_list: list[tuple[str, str]] ) -> Generator[str, None, None]: - for block in commit_log_list: + for desc, author in commit_log_list: if ( not any( - re.search(rf"\b{keyword}\b", block, re.IGNORECASE) + re.search(rf"\b{keyword}\b", desc, re.IGNORECASE) for keyword in KEYWORDS_TO_REMOVE ) - and re.search(r"Bug \d+", block, re.IGNORECASE) + and re.search(r"Bug \d+", desc, re.IGNORECASE) and not re.search( - r"release\+treescript@mozilla\.org", block, re.IGNORECASE + r"release\+treescript@mozilla\.org", author, re.IGNORECASE ) - and not re.search(r"nightly", block, re.IGNORECASE) + and not re.search(r"nightly", desc, re.IGNORECASE) ): - bug_position = re.search(r"Bug \d+.*", block, re.IGNORECASE) + bug_position = re.search(r"Bug \d+.*", desc, re.IGNORECASE) if bug_position: yield bug_position.group(0) - def get_commit_logs(self, version: str) -> Optional[list[str]]: + def get_commit_logs(self) -> Optional[list[tuple[str, str]]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) response.raise_for_status() data = response.json() commit_log_list = [ - changeset["desc"].strip() + (changeset["desc"].strip(), changeset.get("author", "").strip()) for push_data in data.values() for changeset in push_data["changesets"] if "desc" in changeset and changeset["desc"].strip() @@ -164,7 +164,7 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: self.version1 = self.get_previous_version(version) logger.info(f"Generating commit shortlist for: {self.version2}") - commit_log_list = self.get_commit_logs(version) + commit_log_list = self.get_commit_logs() if not commit_log_list: return None From 4140c5256e986d84ac6b5b9b898b07c4c8b29e46 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 15:21:58 -0400 Subject: [PATCH 41/70] Added generative_model_tool --- scripts/release_notes_runner.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index f2e3114524..85fe328063 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -1,9 +1,7 @@ import argparse import logging -import os - -from langchain_openai import ChatOpenAI +from bugbug import generative_model_tool from bugbug.tools.release_notes import ReleaseNotesCommitsSelector logging.basicConfig(level=logging.INFO) @@ -17,20 +15,11 @@ def main(): "--chunk-size", type=int, default=100, help="Number of commits per chunk" ) parser.add_argument( - "--llm", default="openai-gpt-4o", help="Model to use for summarization" + "--llm", default="openai", help="Model to use for summarization" ) args = parser.parse_args() - - if args.llm.startswith("openai-"): - model_name = args.llm.replace("openai-", "") - llm = ChatOpenAI( - model=model_name, - temperature=0.1, - openai_api_key=os.environ.get("OPENAI_API_KEY"), - ) - else: - raise ValueError(f"Unsupported LLM provider: {args.llm}") + llm = generative_model_tool.create_llm_from_args(args) selector = ReleaseNotesCommitsSelector(chunk_size=args.chunk_size, llm=llm) results = selector.get_final_release_notes_commits(version=args.version) From b10f8091687c0cfae1d2e9b0cf3542f5db451977 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 15:36:40 -0400 Subject: [PATCH 42/70] Fixed up code --- bugbug/tools/release_notes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 8602e091e7..451a04f3b0 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -181,6 +181,6 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: if not commit_shortlist: return None - logger.info("Refining commit shortlistt...") + logger.info("Refining commit shortlist...") combined_list = "\n".join(commit_shortlist) return self.cleanup_chain.run({"combined_list": combined_list}).strip() From c6eafb80c8388e1b451ba988e3d5c49f5a9e3158 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 14 Mar 2025 16:02:54 -0400 Subject: [PATCH 43/70] Generalized previous version function --- bugbug/tools/release_notes.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 451a04f3b0..33ba3a11ed 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -104,12 +104,17 @@ def __init__(self, chunk_size: int, llm: LLMChain): ) def get_previous_version(self, current_version: str) -> str: - match = re.match(r"(FIREFOX_BETA_)(\d+)(_BASE)", current_version) + match = re.search(r"(\d+)", current_version) if not match: - raise ValueError("Invalid version format") - prefix, version_number, suffix = match.groups() - previous_version_number = int(version_number) - 1 - return f"{prefix}{previous_version_number}{suffix}" + raise ValueError("No number found in the version string") + + number = match.group(0) + decremented_number = str(int(number) - 1) + return ( + current_version[: match.start()] + + decremented_number + + current_version[match.end() :] + ) def batch_commit_logs(self, commit_log: str) -> list[str]: return [ From 119121559e5f874262f1d59786aa8c76f8291fba Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 20 Mar 2025 20:39:21 -0400 Subject: [PATCH 44/70] Removed explicit llm arg --- scripts/release_notes_runner.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/release_notes_runner.py b/scripts/release_notes_runner.py index 85fe328063..6bf7c89eca 100644 --- a/scripts/release_notes_runner.py +++ b/scripts/release_notes_runner.py @@ -10,13 +10,11 @@ def main(): parser = argparse.ArgumentParser(description="Generate Firefox release notes.") + generative_model_tool.create_llm_to_args(parser) parser.add_argument("--version", required=True, help="Target version identifier") parser.add_argument( "--chunk-size", type=int, default=100, help="Number of commits per chunk" ) - parser.add_argument( - "--llm", default="openai", help="Model to use for summarization" - ) args = parser.parse_args() llm = generative_model_tool.create_llm_from_args(args) From 51d6d9f75e3fb76289a05099918a85efbf2dba65 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 20 Mar 2025 20:57:58 -0400 Subject: [PATCH 45/70] Replaced regex with inequality --- bugbug/tools/release_notes.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 33ba3a11ed..eb929aa86c 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -13,6 +13,8 @@ "DONTBUILD", "add tests", "disable test", + "back out", + "backout", ] logging.basicConfig(level=logging.INFO) @@ -140,9 +142,8 @@ def filter_irrelevant_commits( for keyword in KEYWORDS_TO_REMOVE ) and re.search(r"Bug \d+", desc, re.IGNORECASE) - and not re.search( - r"release\+treescript@mozilla\.org", author, re.IGNORECASE - ) + and author + != "Mozilla Releng Treescript " and not re.search(r"nightly", desc, re.IGNORECASE) ): bug_position = re.search(r"Bug \d+.*", desc, re.IGNORECASE) From 69af3869ff0e4071c7af133dcd4818977a9caa2f Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 20 Mar 2025 23:01:34 -0400 Subject: [PATCH 46/70] Added ignore commit list and specific component/product ignore list --- bugbug/tools/release_notes.py | 45 +++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index eb929aa86c..97d042ab83 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -7,6 +7,8 @@ from langchain.chains import LLMChain from langchain.prompts import PromptTemplate +from bugbug import bugzilla, db + KEYWORDS_TO_REMOVE = [ "Backed out", "a=testonly", @@ -15,6 +17,14 @@ "disable test", "back out", "backout", + "test", + "tests" "ignore-this-changeset", + "CLOSED TREE", +] + +PRODUCT_OR_COMPONENT_TO_IGNORE = [ + "Firefox Build System::Task Configuration", + "Developer Infrastructure::", ] logging.basicConfig(level=logging.INFO) @@ -24,6 +34,10 @@ class ReleaseNotesCommitsSelector: def __init__(self, chunk_size: int, llm: LLMChain): self.chunk_size = chunk_size + self.bug_dict = {} + db.download(bugzilla.BUGS_DB) + for bug in bugzilla.get_bugs(): + self.bug_dict[bug["id"]] = f"{bug['product']}::{bug['component']}" self.llm = llm self.summarization_prompt = PromptTemplate( input_variables=["input_text"], @@ -135,20 +149,41 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: def filter_irrelevant_commits( self, commit_log_list: list[tuple[str, str]] ) -> Generator[str, None, None]: + ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs" + response = requests.get(ignore_revs_url) + response.raise_for_status() + raw_commits_to_ignore = response.text.strip().splitlines() + cleaned_commits_to_ignore = [ + re.sub(r"(?i)^.*?(Bug \d+.*)", r"\1", line) + for line in raw_commits_to_ignore + if re.search(r"Bug \d+", line, re.IGNORECASE) + ] for desc, author in commit_log_list: if ( not any( - re.search(rf"\b{keyword}\b", desc, re.IGNORECASE) - for keyword in KEYWORDS_TO_REMOVE + keyword.lower() in desc.lower() for keyword in KEYWORDS_TO_REMOVE ) and re.search(r"Bug \d+", desc, re.IGNORECASE) and author != "Mozilla Releng Treescript " and not re.search(r"nightly", desc, re.IGNORECASE) + and desc not in cleaned_commits_to_ignore ): - bug_position = re.search(r"Bug \d+.*", desc, re.IGNORECASE) - if bug_position: - yield bug_position.group(0) + bug_match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) + if bug_match: + bug_id = int(bug_match.group(1)) + + bug = self.bug_dict.get(bug_id) + if bug: + if any( + to_ignore in bug + for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE + ): + continue + + bug_position = re.search(r"Bug \d+.*", desc, re.IGNORECASE) + if bug_position: + yield bug_position.group(0) def get_commit_logs(self) -> Optional[list[tuple[str, str]]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" From 88cf6318e05dc972a2fbf84703777c7abc26e7ab Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Fri, 21 Mar 2025 13:59:53 -0400 Subject: [PATCH 47/70] Addressed PR comments --- bugbug/tools/release_notes.py | 64 +++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 97d042ab83..ae4223bf68 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -17,9 +17,11 @@ "disable test", "back out", "backout", - "test", - "tests" "ignore-this-changeset", + "add test", + "added test", + "ignore-this-changeset", "CLOSED TREE", + "nightly", ] PRODUCT_OR_COMPONENT_TO_IGNORE = [ @@ -34,10 +36,12 @@ class ReleaseNotesCommitsSelector: def __init__(self, chunk_size: int, llm: LLMChain): self.chunk_size = chunk_size - self.bug_dict = {} + self.bug_id_to_component = {} db.download(bugzilla.BUGS_DB) for bug in bugzilla.get_bugs(): - self.bug_dict[bug["id"]] = f"{bug['product']}::{bug['component']}" + self.bug_id_to_component[ + bug["id"] + ] = f"{bug['product']}::{bug['component']}" self.llm = llm self.summarization_prompt = PromptTemplate( input_variables=["input_text"], @@ -147,52 +151,52 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: ] def filter_irrelevant_commits( - self, commit_log_list: list[tuple[str, str]] + self, commit_log_list: list[tuple[str, str, str]] ) -> Generator[str, None, None]: ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs" response = requests.get(ignore_revs_url) response.raise_for_status() raw_commits_to_ignore = response.text.strip().splitlines() - cleaned_commits_to_ignore = [ - re.sub(r"(?i)^.*?(Bug \d+.*)", r"\1", line) + hashes_to_ignore = [ + line.split(" ", 1)[0] for line in raw_commits_to_ignore if re.search(r"Bug \d+", line, re.IGNORECASE) ] - for desc, author in commit_log_list: + + for desc, author, node in commit_log_list: + bug_match = re.search(r"(Bug (\d+).*)", desc, re.IGNORECASE) if ( not any( keyword.lower() in desc.lower() for keyword in KEYWORDS_TO_REMOVE ) - and re.search(r"Bug \d+", desc, re.IGNORECASE) + and bug_match + and re.search(r"\br=[^\s,]+", desc) and author != "Mozilla Releng Treescript " - and not re.search(r"nightly", desc, re.IGNORECASE) - and desc not in cleaned_commits_to_ignore + and node not in hashes_to_ignore ): - bug_match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) - if bug_match: - bug_id = int(bug_match.group(1)) - - bug = self.bug_dict.get(bug_id) - if bug: - if any( - to_ignore in bug - for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE - ): - continue - - bug_position = re.search(r"Bug \d+.*", desc, re.IGNORECASE) - if bug_position: - yield bug_position.group(0) - - def get_commit_logs(self) -> Optional[list[tuple[str, str]]]: + bug_id = int(bug_match.group(2)) + + bug_component = self.bug_id_to_component.get(bug_id) + if bug_component and any( + to_ignore in bug_component + for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE + ): + continue + yield bug_match.group(1) + + def get_commit_logs(self) -> Optional[list[tuple[str, str, str]]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" response = requests.get(url) response.raise_for_status() data = response.json() commit_log_list = [ - (changeset["desc"].strip(), changeset.get("author", "").strip()) + ( + changeset["desc"].strip(), + changeset.get("author", "").strip(), + changeset.get("node", "").strip(), + ) for push_data in data.values() for changeset in push_data["changesets"] if "desc" in changeset and changeset["desc"].strip() @@ -211,7 +215,9 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: return None logger.info("Filtering irrelevant commits...") + print(f"num before filtering: {len(commit_log_list)}") filtered_commits = list(self.filter_irrelevant_commits(commit_log_list)) + print(f"num after filtering: {len(filtered_commits)}") if not filtered_commits: return None From 2c0a3ce84a16f76d2e7ea359055b3db2878c7868 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 24 Mar 2025 12:23:45 -0400 Subject: [PATCH 48/70] Converted list to set --- bugbug/tools/release_notes.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index ae4223bf68..c34a4ab82a 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -157,11 +157,11 @@ def filter_irrelevant_commits( response = requests.get(ignore_revs_url) response.raise_for_status() raw_commits_to_ignore = response.text.strip().splitlines() - hashes_to_ignore = [ + hashes_to_ignore = { line.split(" ", 1)[0] for line in raw_commits_to_ignore if re.search(r"Bug \d+", line, re.IGNORECASE) - ] + } for desc, author, node in commit_log_list: bug_match = re.search(r"(Bug (\d+).*)", desc, re.IGNORECASE) @@ -215,9 +215,7 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: return None logger.info("Filtering irrelevant commits...") - print(f"num before filtering: {len(commit_log_list)}") filtered_commits = list(self.filter_irrelevant_commits(commit_log_list)) - print(f"num after filtering: {len(filtered_commits)}") if not filtered_commits: return None From 66dd826d2ac45ea132f1ca24fd3c2e0d9052a23a Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 24 Mar 2025 18:02:52 -0400 Subject: [PATCH 49/70] Added test for previous version --- tests/test_release_notes.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 tests/test_release_notes.py diff --git a/tests/test_release_notes.py b/tests/test_release_notes.py new file mode 100644 index 0000000000..c8580175d1 --- /dev/null +++ b/tests/test_release_notes.py @@ -0,0 +1,22 @@ +import argparse + +from bugbug import generative_model_tool +from bugbug.tools.release_notes import ReleaseNotesCommitsSelector + + +def test_get_previous_version(): + parser = argparse.ArgumentParser(description="Generate Firefox release notes.") + generative_model_tool.create_llm_to_args(parser) + + args = parser.parse_args() + llm = generative_model_tool.create_llm_from_args(args) + selector = ReleaseNotesCommitsSelector(chunk_size=100, llm=llm) + assert ( + selector.get_previous_version("FIREFOX_BETA_135_BASE") + == "FIREFOX_BETA_134_BASE" + ) + assert selector.get_previous_version("FIREFOX_NIGHTLY_132") == "FIREFOX_NIGHTLY_131" + assert ( + selector.get_previous_version("FIREFOX_RELEASE_130_2") + == "FIREFOX_RELEASE_129_2" + ) From f177f16fcb7a6f0d41bf2a2af9c206ac400b7a58 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 26 Mar 2025 11:54:49 -0400 Subject: [PATCH 50/70] Fixed test to not require downloading DB --- bugbug/tools/release_notes.py | 30 ++++++++++++++++-------------- tests/test_release_notes.py | 23 ++++------------------- 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index c34a4ab82a..dad7d7ab1f 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -29,6 +29,21 @@ "Developer Infrastructure::", ] + +def get_previous_version(current_version: str) -> str: + match = re.search(r"(\d+)", current_version) + if not match: + raise ValueError("No number found in the version string") + + number = match.group(0) + decremented_number = str(int(number) - 1) + return ( + current_version[: match.start()] + + decremented_number + + current_version[match.end() :] + ) + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -123,19 +138,6 @@ def __init__(self, chunk_size: int, llm: LLMChain): prompt=self.cleanup_prompt, ) - def get_previous_version(self, current_version: str) -> str: - match = re.search(r"(\d+)", current_version) - if not match: - raise ValueError("No number found in the version string") - - number = match.group(0) - decremented_number = str(int(number) - 1) - return ( - current_version[: match.start()] - + decremented_number - + current_version[match.end() :] - ) - def batch_commit_logs(self, commit_log: str) -> list[str]: return [ "\n".join(batch) @@ -206,7 +208,7 @@ def get_commit_logs(self) -> Optional[list[tuple[str, str, str]]]: def get_final_release_notes_commits(self, version: str) -> Optional[str]: self.version2 = version - self.version1 = self.get_previous_version(version) + self.version1 = get_previous_version(version) logger.info(f"Generating commit shortlist for: {self.version2}") commit_log_list = self.get_commit_logs() diff --git a/tests/test_release_notes.py b/tests/test_release_notes.py index c8580175d1..a89d419192 100644 --- a/tests/test_release_notes.py +++ b/tests/test_release_notes.py @@ -1,22 +1,7 @@ -import argparse - -from bugbug import generative_model_tool -from bugbug.tools.release_notes import ReleaseNotesCommitsSelector +from bugbug.tools.release_notes import get_previous_version def test_get_previous_version(): - parser = argparse.ArgumentParser(description="Generate Firefox release notes.") - generative_model_tool.create_llm_to_args(parser) - - args = parser.parse_args() - llm = generative_model_tool.create_llm_from_args(args) - selector = ReleaseNotesCommitsSelector(chunk_size=100, llm=llm) - assert ( - selector.get_previous_version("FIREFOX_BETA_135_BASE") - == "FIREFOX_BETA_134_BASE" - ) - assert selector.get_previous_version("FIREFOX_NIGHTLY_132") == "FIREFOX_NIGHTLY_131" - assert ( - selector.get_previous_version("FIREFOX_RELEASE_130_2") - == "FIREFOX_RELEASE_129_2" - ) + assert get_previous_version("FIREFOX_BETA_135_BASE") == "FIREFOX_BETA_134_BASE" + assert get_previous_version("FIREFOX_NIGHTLY_132") == "FIREFOX_NIGHTLY_131" + assert get_previous_version("FIREFOX_RELEASE_130_2") == "FIREFOX_RELEASE_129_2" From 1946dcaf36fdc73a1a2d784fc7d7383863724e8f Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 7 Apr 2025 14:28:50 -0400 Subject: [PATCH 51/70] Initial cloud function --- bugbug/tools/release_notes_cloud_function.py | 71 ++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 bugbug/tools/release_notes_cloud_function.py diff --git a/bugbug/tools/release_notes_cloud_function.py b/bugbug/tools/release_notes_cloud_function.py new file mode 100644 index 0000000000..c8df6de284 --- /dev/null +++ b/bugbug/tools/release_notes_cloud_function.py @@ -0,0 +1,71 @@ +import logging +import os +from types import SimpleNamespace + +import flask +import functions_framework +from google.cloud import secretmanager + +from bugbug import generative_model_tool +from bugbug.tools.release_notes import ReleaseNotesCommitsSelector + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@functions_framework.http +def handle_release_notes(request: flask.Request) -> flask.Response: + if request.method != "GET": + return flask.Response("Only GET requests are allowed", status=405) + + version = request.args.get("version") + if not version: + return flask.Response("Missing 'version' query parameter", status=400) + + try: + openai_key = get_openai_api_key() + os.environ["OPENAI_API_KEY"] = openai_key + except Exception as e: + return flask.Response(f"Failed to load OpenAI key: {str(e)}", status=500) + + args = build_args_from_request(request) + + try: + llm = generative_model_tool.create_llm_from_args(args) + selector = ReleaseNotesCommitsSelector(chunk_size=args.chunk_size, llm=llm) + notes = selector.get_final_release_notes_commits(version=args.version) + + if not notes: + return flask.Response("No user-facing commits found.", status=404) + + return flask.Response(notes, mimetype="text/plain") + except Exception as e: + logger.exception("Failed to generate release notes") + return flask.Response(f"Internal Server Error: {str(e)}", status=500) + + +def build_args_from_request(request: flask.Request): + def get(key, default=None, type_fn=str): + value = request.args.get(key) + return type_fn(value) if value is not None else default + + return SimpleNamespace( + provider=get("provider", default="openai"), + temperature=get("temperature", default=0.0, type_fn=float), + model=get("model", default=None), + api_base=get("api_base", default=None), + api_key=os.environ.get("OPENAI_API_KEY"), + version=get("version"), + chunk_size=get("chunk_size", default=100, type_fn=int), + ) + + +def get_openai_api_key(): + client = secretmanager.SecretManagerServiceClient() + project_id = os.environ["GCP_PROJECT"] + secret_name = "OPENAI_API_KEY" + version = "latest" + + name = f"projects/{project_id}/secrets/{secret_name}/versions/{version}" + response = client.access_secret_version(name=name) + return response.payload.data.decode("UTF-8") From 75848d9bb6d650b84070daeb1aafd3f74d9fc4d7 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 7 Apr 2025 15:03:43 -0400 Subject: [PATCH 52/70] Moved cloud function file to functions folder --- .../release_notes/main.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bugbug/tools/release_notes_cloud_function.py => functions/release_notes/main.py (100%) diff --git a/bugbug/tools/release_notes_cloud_function.py b/functions/release_notes/main.py similarity index 100% rename from bugbug/tools/release_notes_cloud_function.py rename to functions/release_notes/main.py From 284c6f26969e3821da6275a5fa7d6b326f775acb Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 7 Apr 2025 15:26:16 -0400 Subject: [PATCH 53/70] Added requirements --- functions/release_notes/main.py | 16 ++-------------- functions/release_notes/requirements.txt | 6 ++++++ 2 files changed, 8 insertions(+), 14 deletions(-) create mode 100644 functions/release_notes/requirements.txt diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index c8df6de284..7d419e2172 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -4,10 +4,10 @@ import flask import functions_framework -from google.cloud import secretmanager from bugbug import generative_model_tool from bugbug.tools.release_notes import ReleaseNotesCommitsSelector +from bugbug.utils import get_secret logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -23,8 +23,7 @@ def handle_release_notes(request: flask.Request) -> flask.Response: return flask.Response("Missing 'version' query parameter", status=400) try: - openai_key = get_openai_api_key() - os.environ["OPENAI_API_KEY"] = openai_key + os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") except Exception as e: return flask.Response(f"Failed to load OpenAI key: {str(e)}", status=500) @@ -58,14 +57,3 @@ def get(key, default=None, type_fn=str): version=get("version"), chunk_size=get("chunk_size", default=100, type_fn=int), ) - - -def get_openai_api_key(): - client = secretmanager.SecretManagerServiceClient() - project_id = os.environ["GCP_PROJECT"] - secret_name = "OPENAI_API_KEY" - version = "latest" - - name = f"projects/{project_id}/secrets/{secret_name}/versions/{version}" - response = client.access_secret_version(name=name) - return response.payload.data.decode("UTF-8") diff --git a/functions/release_notes/requirements.txt b/functions/release_notes/requirements.txt new file mode 100644 index 0000000000..f89a35177e --- /dev/null +++ b/functions/release_notes/requirements.txt @@ -0,0 +1,6 @@ +bugbug +Flask==2.2.5 +functions-framework==3.5.0 +langchain +openai +requests From 89bac35d4df55f12ae95caebc0aad2dbe516a1d8 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 7 Apr 2025 15:31:09 -0400 Subject: [PATCH 54/70] Fixed args --- functions/release_notes/main.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 7d419e2172..30fcde72f5 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -48,12 +48,17 @@ def get(key, default=None, type_fn=str): value = request.args.get(key) return type_fn(value) if value is not None else default - return SimpleNamespace( - provider=get("provider", default="openai"), - temperature=get("temperature", default=0.0, type_fn=float), - model=get("model", default=None), - api_base=get("api_base", default=None), - api_key=os.environ.get("OPENAI_API_KEY"), - version=get("version"), - chunk_size=get("chunk_size", default=100, type_fn=int), - ) + llm = get("llm", default="openai") + + args = { + "llm": llm, + "version": get("version"), + "chunk_size": get("chunk_size", default=100, type_fn=int), + } + + # Dynamically add model-specific args like openai_temperature, etc. + for arg_name in request.args: + if arg_name.startswith(f"{llm}_"): + args[arg_name] = request.args.get(arg_name) + + return SimpleNamespace(**args) From ff62313213c69f7096d3124514a4c03a83ba090b Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 7 Apr 2025 15:35:33 -0400 Subject: [PATCH 55/70] Fixed args --- functions/release_notes/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 30fcde72f5..ba166eed68 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -56,7 +56,6 @@ def get(key, default=None, type_fn=str): "chunk_size": get("chunk_size", default=100, type_fn=int), } - # Dynamically add model-specific args like openai_temperature, etc. for arg_name in request.args: if arg_name.startswith(f"{llm}_"): args[arg_name] = request.args.get(arg_name) From 4a042fc41cde53a65a507a6c43639139e5c45241 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 8 Apr 2025 15:33:31 -0400 Subject: [PATCH 56/70] Added workflow to deploy --- functions/release_notes/deploy.yml | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 functions/release_notes/deploy.yml diff --git a/functions/release_notes/deploy.yml b/functions/release_notes/deploy.yml new file mode 100644 index 0000000000..3375daeea5 --- /dev/null +++ b/functions/release_notes/deploy.yml @@ -0,0 +1,43 @@ +name: Deploy Release Notes Function + +on: + workflow_dispatch: + workflow_run: + workflows: [Test Backend] + branches: [main] + types: [completed] + +jobs: + deploy: + runs-on: ubuntu-latest + if: ${{ github.event.workflow_run.conclusion == 'success' }} + permissions: + contents: read + id-token: write + + steps: + - uses: actions/checkout@v4 + + - name: Google Cloud Auth + id: auth + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GCP_SA_CREDENTIALS }} + + - name: Set up gcloud + uses: google-github-actions/setup-gcloud@v2 + + - name: Deploy to Cloud Functions + working-directory: functions/release_notes + run: | + gcloud functions deploy release-notes \ + --gen2 \ + --trigger-http \ + --allow-unauthenticated \ + --region=us-central1 \ + --timeout=240 \ + --memory=2Gi \ + --runtime=python311 \ + --entry-point=handle_release_notes \ + --service-account=review-helper@moz-bugbug.iam.gserviceaccount.com \ + --set-secrets=OPENAI_API_KEY=openai-api-key:latest From fbb46ad21529fbb7a2cb2e2f597d29f9fcd867c2 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 10 Apr 2025 10:22:32 -0400 Subject: [PATCH 57/70] Moved workflow file and fixed to trigger every tag rather than every commit --- .../deploy.yml => .github/workflows/release_notes.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) rename functions/release_notes/deploy.yml => .github/workflows/release_notes.yml (85%) diff --git a/functions/release_notes/deploy.yml b/.github/workflows/release_notes.yml similarity index 85% rename from functions/release_notes/deploy.yml rename to .github/workflows/release_notes.yml index 3375daeea5..16f951f6fe 100644 --- a/functions/release_notes/deploy.yml +++ b/.github/workflows/release_notes.yml @@ -1,16 +1,14 @@ name: Deploy Release Notes Function on: - workflow_dispatch: - workflow_run: - workflows: [Test Backend] - branches: [main] - types: [completed] + push: + tags: + - "*" jobs: deploy: runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' }} + permissions: contents: read id-token: write From 2c5d73c1df6941b544cb751cbb6573969897514c Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 10 Apr 2025 17:23:47 -0400 Subject: [PATCH 58/70] Addressed PR comments --- bugbug/generative_model_tool.py | 22 ++++++++++++++ functions/release_notes/main.py | 52 +++++++++------------------------ 2 files changed, 35 insertions(+), 39 deletions(-) diff --git a/bugbug/generative_model_tool.py b/bugbug/generative_model_tool.py index d731319133..cc528bbda5 100644 --- a/bugbug/generative_model_tool.py +++ b/bugbug/generative_model_tool.py @@ -147,6 +147,28 @@ def create_llm_from_args(args): return globals()[f"create_{args.llm}_llm"](**llm_creation_args) +def create_llm_from_request(llm_name, request_args): + if llm_name not in AVAILABLE_LLMS: + raise NotImplementedError(f"LLM '{llm_name}' is not supported") + + expected_args = AVAILABLE_LLMS[llm_name] + llm_creation_args = {} + + for param_name in expected_args: + value = request_args.get(param_name) + if value is not None: + param = expected_args[param_name] + type_fn = ( + param.annotation if param.annotation != inspect.Parameter.empty else str + ) + try: + llm_creation_args[param_name] = type_fn(value) + except Exception: + llm_creation_args[param_name] = value + + return globals()[f"create_{llm_name}_llm"](**llm_creation_args) + + def get_tokenizer(model_name): import tiktoken diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index ba166eed68..8067df124e 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -1,6 +1,5 @@ import logging import os -from types import SimpleNamespace import flask import functions_framework @@ -14,50 +13,25 @@ @functions_framework.http -def handle_release_notes(request: flask.Request) -> flask.Response: +def handle_release_notes(request: flask.Request): if request.method != "GET": - return flask.Response("Only GET requests are allowed", status=405) + return "Only GET requests are allowed", 405 version = request.args.get("version") if not version: - return flask.Response("Missing 'version' query parameter", status=400) + return "Missing 'version' query parameter", 400 - try: - os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") - except Exception as e: - return flask.Response(f"Failed to load OpenAI key: {str(e)}", status=500) + os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") - args = build_args_from_request(request) - - try: - llm = generative_model_tool.create_llm_from_args(args) - selector = ReleaseNotesCommitsSelector(chunk_size=args.chunk_size, llm=llm) - notes = selector.get_final_release_notes_commits(version=args.version) - - if not notes: - return flask.Response("No user-facing commits found.", status=404) - - return flask.Response(notes, mimetype="text/plain") - except Exception as e: - logger.exception("Failed to generate release notes") - return flask.Response(f"Internal Server Error: {str(e)}", status=500) - - -def build_args_from_request(request: flask.Request): - def get(key, default=None, type_fn=str): - value = request.args.get(key) - return type_fn(value) if value is not None else default - - llm = get("llm", default="openai") + llm_name = request.args.get("llm", "openai") + chunk_size = int(request.args.get("chunk_size", 100)) + version = request.args.get("version") - args = { - "llm": llm, - "version": get("version"), - "chunk_size": get("chunk_size", default=100, type_fn=int), - } + llm = generative_model_tool.create_llm_from_request(llm_name, request.args) + selector = ReleaseNotesCommitsSelector(chunk_size=chunk_size, llm=llm) + notes = selector.get_final_release_notes_commits(version=version) - for arg_name in request.args: - if arg_name.startswith(f"{llm}_"): - args[arg_name] = request.args.get(arg_name) + if not notes: + return "No user-facing commits found.", 404 - return SimpleNamespace(**args) + return notes, 200, {"Content-Type": "text/plain"} From bf239d00d9afc46d5827234cbb97e2a1f87527a6 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 10 Apr 2025 17:26:00 -0400 Subject: [PATCH 59/70] Addressed PR comments --- .github/workflows/release_notes.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/release_notes.yml b/.github/workflows/release_notes.yml index 16f951f6fe..d23cdaa3cc 100644 --- a/.github/workflows/release_notes.yml +++ b/.github/workflows/release_notes.yml @@ -1,9 +1,6 @@ name: Deploy Release Notes Function -on: - push: - tags: - - "*" +on: workflow_dispatch jobs: deploy: From c79890e7345252a8ffcafa3928418f9a53de5b09 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 10 Apr 2025 17:41:39 -0400 Subject: [PATCH 60/70] Addressed PR comments --- functions/release_notes/main.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 8067df124e..99382f77b2 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -11,6 +11,15 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") + +llm_cache = { + "llm_name": None, + "llm": None, + "chunk_size": None, + "selector": None, +} + @functions_framework.http def handle_release_notes(request: flask.Request): @@ -21,14 +30,25 @@ def handle_release_notes(request: flask.Request): if not version: return "Missing 'version' query parameter", 400 - os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") - llm_name = request.args.get("llm", "openai") chunk_size = int(request.args.get("chunk_size", 100)) - version = request.args.get("version") - llm = generative_model_tool.create_llm_from_request(llm_name, request.args) - selector = ReleaseNotesCommitsSelector(chunk_size=chunk_size, llm=llm) + if ( + llm_cache["llm"] is None + or llm_cache["llm_name"] != llm_name + or llm_cache["chunk_size"] != chunk_size + ): + logger.info("Initializing new LLM and selector...") + llm_cache["llm_name"] = llm_name + llm_cache["llm"] = generative_model_tool.create_llm_from_request( + llm_name, request.args + ) + llm_cache["chunk_size"] = chunk_size + llm_cache["selector"] = ReleaseNotesCommitsSelector( + chunk_size=chunk_size, llm=llm_cache["llm"] + ) + + selector = llm_cache["selector"] notes = selector.get_final_release_notes_commits(version=version) if not notes: From b72d217017431c7ba8ffcee08ed5921adec68238 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 10 Apr 2025 17:50:15 -0400 Subject: [PATCH 61/70] Added explicit deduplication --- bugbug/tools/release_notes.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index dad7d7ab1f..a17d907e5a 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -206,6 +206,19 @@ def get_commit_logs(self) -> Optional[list[tuple[str, str, str]]]: return commit_log_list if commit_log_list else None + def remove_duplicate_bugs(self, csv_text: str) -> str: + seen = set() + unique_lines = [] + for line in csv_text.strip().splitlines(): + parts = line.split(",", 3) + if len(parts) < 3: + continue + bug_id = parts[2].strip() + if bug_id not in seen: + seen.add(bug_id) + unique_lines.append(line) + return "\n".join(unique_lines) + def get_final_release_notes_commits(self, version: str) -> Optional[str]: self.version2 = version self.version1 = get_previous_version(version) @@ -230,4 +243,8 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: logger.info("Refining commit shortlist...") combined_list = "\n".join(commit_shortlist) - return self.cleanup_chain.run({"combined_list": combined_list}).strip() + cleaned = self.cleanup_chain.run({"combined_list": combined_list}).strip() + + logger.info("Removing duplicates...") + deduped = self.remove_duplicate_bugs(cleaned) + return deduped From 3e9c7f71f9e5ec3970ad22716de15151a77bfe12 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Mon, 14 Apr 2025 12:45:20 -0400 Subject: [PATCH 62/70] Hard coded llm name and chunk size --- functions/release_notes/main.py | 42 +++++++++++++-------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 99382f77b2..5eaeb5ff86 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -13,16 +13,16 @@ os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") -llm_cache = { - "llm_name": None, - "llm": None, - "chunk_size": None, - "selector": None, -} +tool: ReleaseNotesCommitsSelector | None = None + +DEFAULT_LLM_NAME = "openai" +DEFAULT_CHUNK_SIZE = 1000 @functions_framework.http def handle_release_notes(request: flask.Request): + global tool + if request.method != "GET": return "Only GET requests are allowed", 405 @@ -30,28 +30,20 @@ def handle_release_notes(request: flask.Request): if not version: return "Missing 'version' query parameter", 400 - llm_name = request.args.get("llm", "openai") - chunk_size = int(request.args.get("chunk_size", 100)) - if ( - llm_cache["llm"] is None - or llm_cache["llm_name"] != llm_name - or llm_cache["chunk_size"] != chunk_size + tool is None + or tool.llm_name != DEFAULT_LLM_NAME + or tool.chunk_size != DEFAULT_CHUNK_SIZE ): - logger.info("Initializing new LLM and selector...") - llm_cache["llm_name"] = llm_name - llm_cache["llm"] = generative_model_tool.create_llm_from_request( - llm_name, request.args - ) - llm_cache["chunk_size"] = chunk_size - llm_cache["selector"] = ReleaseNotesCommitsSelector( - chunk_size=chunk_size, llm=llm_cache["llm"] - ) - - selector = llm_cache["selector"] - notes = selector.get_final_release_notes_commits(version=version) + logger.info("Initializing new ReleaseNotesCommitsSelector...") + llm = generative_model_tool.create_llm_from_request(DEFAULT_LLM_NAME, {}) + tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) + tool.llm_name = DEFAULT_LLM_NAME + tool.chunk_size = DEFAULT_CHUNK_SIZE + + notes = tool.get_final_release_notes_commits(version=version) if not notes: - return "No user-facing commits found.", 404 + return "", 200, {"Content-Type": "text/plain"} return notes, 200, {"Content-Type": "text/plain"} From 0da3e8a020c2776d86b73d8535c0bfdcf2ac76f6 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 15 Apr 2025 14:29:44 -0400 Subject: [PATCH 63/70] Changed output to be a list and JSON --- bugbug/tools/release_notes.py | 4 ++-- functions/release_notes/main.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index a17d907e5a..51d1cd2317 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -219,7 +219,7 @@ def remove_duplicate_bugs(self, csv_text: str) -> str: unique_lines.append(line) return "\n".join(unique_lines) - def get_final_release_notes_commits(self, version: str) -> Optional[str]: + def get_final_release_notes_commits(self, version: str) -> Optional[list[str]]: self.version2 = version self.version1 = get_previous_version(version) @@ -247,4 +247,4 @@ def get_final_release_notes_commits(self, version: str) -> Optional[str]: logger.info("Removing duplicates...") deduped = self.remove_duplicate_bugs(cleaned) - return deduped + return deduped.splitlines() diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 5eaeb5ff86..c072c200d9 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -44,6 +44,6 @@ def handle_release_notes(request: flask.Request): notes = tool.get_final_release_notes_commits(version=version) if not notes: - return "", 200, {"Content-Type": "text/plain"} + return {"commits": []}, 200, {"Content-Type": "application/json"} - return notes, 200, {"Content-Type": "text/plain"} + return {"commits": notes}, 200, {"Content-Type": "application/json"} From 1d6ecc6187ed273e1c7a83cb1fecc7d8cad6d763 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 15 Apr 2025 21:55:44 -0400 Subject: [PATCH 64/70] Addressed PR comments --- functions/release_notes/main.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index c072c200d9..bfb011760c 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -30,11 +30,7 @@ def handle_release_notes(request: flask.Request): if not version: return "Missing 'version' query parameter", 400 - if ( - tool is None - or tool.llm_name != DEFAULT_LLM_NAME - or tool.chunk_size != DEFAULT_CHUNK_SIZE - ): + if tool is None: logger.info("Initializing new ReleaseNotesCommitsSelector...") llm = generative_model_tool.create_llm_from_request(DEFAULT_LLM_NAME, {}) tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) @@ -44,6 +40,6 @@ def handle_release_notes(request: flask.Request): notes = tool.get_final_release_notes_commits(version=version) if not notes: - return {"commits": []}, 200, {"Content-Type": "application/json"} + return {"commits": []}, 200 - return {"commits": notes}, 200, {"Content-Type": "application/json"} + return {"commits": notes}, 200 From 9b07b5b039f7e8ebfcb9bae36a8110f07e211a68 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 15 Apr 2025 22:06:50 -0400 Subject: [PATCH 65/70] Simplified LLM creation --- bugbug/generative_model_tool.py | 22 ---------------------- functions/release_notes/main.py | 6 +++--- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/bugbug/generative_model_tool.py b/bugbug/generative_model_tool.py index cc528bbda5..d731319133 100644 --- a/bugbug/generative_model_tool.py +++ b/bugbug/generative_model_tool.py @@ -147,28 +147,6 @@ def create_llm_from_args(args): return globals()[f"create_{args.llm}_llm"](**llm_creation_args) -def create_llm_from_request(llm_name, request_args): - if llm_name not in AVAILABLE_LLMS: - raise NotImplementedError(f"LLM '{llm_name}' is not supported") - - expected_args = AVAILABLE_LLMS[llm_name] - llm_creation_args = {} - - for param_name in expected_args: - value = request_args.get(param_name) - if value is not None: - param = expected_args[param_name] - type_fn = ( - param.annotation if param.annotation != inspect.Parameter.empty else str - ) - try: - llm_creation_args[param_name] = type_fn(value) - except Exception: - llm_creation_args[param_name] = value - - return globals()[f"create_{llm_name}_llm"](**llm_creation_args) - - def get_tokenizer(model_name): import tiktoken diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index bfb011760c..c010483546 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -15,7 +15,6 @@ tool: ReleaseNotesCommitsSelector | None = None -DEFAULT_LLM_NAME = "openai" DEFAULT_CHUNK_SIZE = 1000 @@ -32,9 +31,10 @@ def handle_release_notes(request: flask.Request): if tool is None: logger.info("Initializing new ReleaseNotesCommitsSelector...") - llm = generative_model_tool.create_llm_from_request(DEFAULT_LLM_NAME, {}) + + llm = generative_model_tool.create_openai_llm() tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) - tool.llm_name = DEFAULT_LLM_NAME + tool.llm_name = "openai" tool.chunk_size = DEFAULT_CHUNK_SIZE notes = tool.get_final_release_notes_commits(version=version) From e852e9d9bb9714e55fe83189c984a17e6a2330ab Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Thu, 17 Apr 2025 11:02:21 -0400 Subject: [PATCH 66/70] Replaced DB with Bugzilla calls --- bugbug/tools/release_notes.py | 36 +++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 51d1cd2317..7cb3ebd7ff 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -1,13 +1,12 @@ import logging import re from itertools import batched -from typing import Generator, Optional +from typing import Iterator, Optional import requests from langchain.chains import LLMChain from langchain.prompts import PromptTemplate - -from bugbug import bugzilla, db +from libmozdata.bugzilla import Bugzilla KEYWORDS_TO_REMOVE = [ "Backed out", @@ -44,6 +43,21 @@ def get_previous_version(current_version: str) -> str: ) +def fetch_bug_components(bug_ids: list[int]) -> dict[int, str]: + bug_id_to_component = {} + + def bug_handler(bug): + bug_id_to_component[bug["id"]] = f"{bug['product']}::{bug['component']}" + + Bugzilla( + bugids=bug_ids, + include_fields=["id", "product", "component"], + bughandler=bug_handler, + ).wait() + + return bug_id_to_component + + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -51,12 +65,7 @@ def get_previous_version(current_version: str) -> str: class ReleaseNotesCommitsSelector: def __init__(self, chunk_size: int, llm: LLMChain): self.chunk_size = chunk_size - self.bug_id_to_component = {} - db.download(bugzilla.BUGS_DB) - for bug in bugzilla.get_bugs(): - self.bug_id_to_component[ - bug["id"] - ] = f"{bug['product']}::{bug['component']}" + self.bug_id_to_component: dict[int, str] = {} self.llm = llm self.summarization_prompt = PromptTemplate( input_variables=["input_text"], @@ -154,7 +163,7 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: def filter_irrelevant_commits( self, commit_log_list: list[tuple[str, str, str]] - ) -> Generator[str, None, None]: + ) -> Iterator[str]: ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs" response = requests.get(ignore_revs_url) response.raise_for_status() @@ -230,6 +239,13 @@ def get_final_release_notes_commits(self, version: str) -> Optional[list[str]]: return None logger.info("Filtering irrelevant commits...") + bug_ids = [] + for desc, _, _ in commit_log_list: + match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) + if match: + bug_ids.append(int(match.group(1))) + + self.bug_id_to_component = fetch_bug_components(bug_ids) filtered_commits = list(self.filter_irrelevant_commits(commit_log_list)) if not filtered_commits: From 11a6444800631fbf8c9322c21d9b9c54be607190 Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Wed, 23 Apr 2025 13:47:43 -0400 Subject: [PATCH 67/70] Addressed PR comments --- bugbug/tools/release_notes.py | 32 +++++++++++------------- functions/release_notes/main.py | 18 +------------ functions/release_notes/requirements.txt | 5 +--- 3 files changed, 16 insertions(+), 39 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 7cb3ebd7ff..a1ee16a9f8 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -29,17 +29,17 @@ ] -def get_previous_version(current_version: str) -> str: - match = re.search(r"(\d+)", current_version) +def get_previous_version(target_version: str) -> str: + match = re.search(r"(\d+)", target_version) if not match: raise ValueError("No number found in the version string") number = match.group(0) decremented_number = str(int(number) - 1) return ( - current_version[: match.start()] + target_version[: match.start()] + decremented_number - + current_version[match.end() :] + + target_version[match.end() :] ) @@ -196,11 +196,12 @@ def filter_irrelevant_commits( continue yield bug_match.group(1) - def get_commit_logs(self) -> Optional[list[tuple[str, str, str]]]: - url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={self.version1}&tochange={self.version2}&full=1" + def get_commit_logs( + self, preceding_version: str, target_version: str + ) -> Optional[list[tuple[str, str, str]]]: + url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={preceding_version}&tochange={target_version}&full=1" response = requests.get(url) response.raise_for_status() - data = response.json() commit_log_list = [ ( @@ -212,7 +213,6 @@ def get_commit_logs(self) -> Optional[list[tuple[str, str, str]]]: for changeset in push_data["changesets"] if "desc" in changeset and changeset["desc"].strip() ] - return commit_log_list if commit_log_list else None def remove_duplicate_bugs(self, csv_text: str) -> str: @@ -228,17 +228,16 @@ def remove_duplicate_bugs(self, csv_text: str) -> str: unique_lines.append(line) return "\n".join(unique_lines) - def get_final_release_notes_commits(self, version: str) -> Optional[list[str]]: - self.version2 = version - self.version1 = get_previous_version(version) - - logger.info(f"Generating commit shortlist for: {self.version2}") - commit_log_list = self.get_commit_logs() + def get_final_release_notes_commits( + self, target_version: str + ) -> Optional[list[str]]: + preceding_version = get_previous_version(target_version) + logger.info(f"Generating commit shortlist for: {target_version}") + commit_log_list = self.get_commit_logs(preceding_version, target_version) if not commit_log_list: return None - logger.info("Filtering irrelevant commits...") bug_ids = [] for desc, _, _ in commit_log_list: match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) @@ -251,16 +250,13 @@ def get_final_release_notes_commits(self, version: str) -> Optional[list[str]]: if not filtered_commits: return None - logger.info("Generating commit shortlist...") commit_shortlist = self.generate_commit_shortlist(filtered_commits) if not commit_shortlist: return None - logger.info("Refining commit shortlist...") combined_list = "\n".join(commit_shortlist) cleaned = self.cleanup_chain.run({"combined_list": combined_list}).strip() - logger.info("Removing duplicates...") deduped = self.remove_duplicate_bugs(cleaned) return deduped.splitlines() diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index c010483546..275deb696c 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -1,17 +1,8 @@ -import logging -import os - import flask import functions_framework from bugbug import generative_model_tool from bugbug.tools.release_notes import ReleaseNotesCommitsSelector -from bugbug.utils import get_secret - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -os.environ["OPENAI_API_KEY"] = get_secret("OPENAI_API_KEY") tool: ReleaseNotesCommitsSelector | None = None @@ -30,16 +21,9 @@ def handle_release_notes(request: flask.Request): return "Missing 'version' query parameter", 400 if tool is None: - logger.info("Initializing new ReleaseNotesCommitsSelector...") - llm = generative_model_tool.create_openai_llm() tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) - tool.llm_name = "openai" - tool.chunk_size = DEFAULT_CHUNK_SIZE - - notes = tool.get_final_release_notes_commits(version=version) - if not notes: - return {"commits": []}, 200 + notes = tool.get_final_release_notes_commits(target_version=version) return {"commits": notes}, 200 diff --git a/functions/release_notes/requirements.txt b/functions/release_notes/requirements.txt index f89a35177e..cbc45fa128 100644 --- a/functions/release_notes/requirements.txt +++ b/functions/release_notes/requirements.txt @@ -1,6 +1,3 @@ -bugbug +bugbug==0.0.573 Flask==2.2.5 functions-framework==3.5.0 -langchain -openai -requests From aebee0ae86ea27d28ae80769d788bc71fd3771be Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Sun, 27 Apr 2025 17:04:34 -0400 Subject: [PATCH 68/70] Addressed PR comments --- bugbug/tools/release_notes.py | 73 +++++++++++++++++---------------- functions/release_notes/main.py | 4 +- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index a1ee16a9f8..bfbfcc7c25 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -68,7 +68,7 @@ def __init__(self, chunk_size: int, llm: LLMChain): self.bug_id_to_component: dict[int, str] = {} self.llm = llm self.summarization_prompt = PromptTemplate( - input_variables=["input_text"], + input_variables=["commit_list"], template="""You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps: 1. Must Include Only Meaningful Changes: @@ -98,14 +98,14 @@ def __init__(self, chunk_size: int, llm: LLMChain): 4. Select Only the Top 10 Commits: - If there are more than 10 relevant commits, choose the most impactful ones. -5. Input: - Here is the chunk of commit logs you need to focus on: - {input_text} - -6. Output Requirements: +5. Output Requirements: - Output must be raw CSV text—no formatting, no extra text. - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting. - Do not include the words "CSV" or any headers—just the data. + +6. Input: + Here is the list of commits you need to focus on: + {commit_list} """, ) @@ -131,14 +131,14 @@ def __init__(self, chunk_size: int, llm: LLMChain): - Obscure web compatibility changes that apply only to edge-case websites. - Duplicate entries or similar changes that were already listed. -Here is the list to filter: -{combined_list} - Instructions: - KEEP THE SAME FORMAT (do not change the structure of entries that remain). - REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete). - DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST. - The output must be only the cleaned-up list, formatted exactly the same way. + +Here is the list to filter: +{combined_list} """, ) @@ -157,13 +157,11 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]: commit_log_list_combined = "\n".join(commit_log_list) chunks = self.batch_commit_logs(commit_log_list_combined) return [ - self.summarization_chain.run({"input_text": chunk}).strip() + self.summarization_chain.run({"commit_list": chunk}).strip() for chunk in chunks ] - def filter_irrelevant_commits( - self, commit_log_list: list[tuple[str, str, str]] - ) -> Iterator[str]: + def filter_irrelevant_commits(self, commit_log_list: list[dict]) -> Iterator[str]: ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs" response = requests.get(ignore_revs_url) response.raise_for_status() @@ -174,45 +172,54 @@ def filter_irrelevant_commits( if re.search(r"Bug \d+", line, re.IGNORECASE) } - for desc, author, node in commit_log_list: - bug_match = re.search(r"(Bug (\d+).*)", desc, re.IGNORECASE) + for commit in commit_log_list: + desc = commit["desc"] + author = commit["author"] + node = commit["node"] + bug_id = commit["bug_id"] + if ( not any( keyword.lower() in desc.lower() for keyword in KEYWORDS_TO_REMOVE ) - and bug_match + and bug_id and re.search(r"\br=[^\s,]+", desc) and author != "Mozilla Releng Treescript " and node not in hashes_to_ignore ): - bug_id = int(bug_match.group(2)) - bug_component = self.bug_id_to_component.get(bug_id) if bug_component and any( to_ignore in bug_component for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE ): continue - yield bug_match.group(1) + yield desc def get_commit_logs( self, preceding_version: str, target_version: str - ) -> Optional[list[tuple[str, str, str]]]: + ) -> Optional[list[dict]]: url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={preceding_version}&tochange={target_version}&full=1" response = requests.get(url) response.raise_for_status() data = response.json() - commit_log_list = [ - ( - changeset["desc"].strip(), - changeset.get("author", "").strip(), - changeset.get("node", "").strip(), - ) - for push_data in data.values() - for changeset in push_data["changesets"] - if "desc" in changeset and changeset["desc"].strip() - ] + commit_log_list = [] + for push_data in data.values(): + for changeset in push_data["changesets"]: + if "desc" in changeset and changeset["desc"].strip(): + desc = changeset["desc"].strip() + author = changeset.get("author", "").strip() + node = changeset.get("node", "").strip() + match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) + bug_id = int(match.group(1)) if match else None + commit_log_list.append( + { + "desc": desc, + "author": author, + "node": node, + "bug_id": bug_id, + } + ) return commit_log_list if commit_log_list else None def remove_duplicate_bugs(self, csv_text: str) -> str: @@ -238,11 +245,7 @@ def get_final_release_notes_commits( if not commit_log_list: return None - bug_ids = [] - for desc, _, _ in commit_log_list: - match = re.search(r"Bug (\d+)", desc, re.IGNORECASE) - if match: - bug_ids.append(int(match.group(1))) + bug_ids = [commit["bug_id"] for commit in commit_log_list if commit["bug_id"]] self.bug_id_to_component = fetch_bug_components(bug_ids) filtered_commits = list(self.filter_irrelevant_commits(commit_log_list)) diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 275deb696c..901500ad74 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -24,6 +24,6 @@ def handle_release_notes(request: flask.Request): llm = generative_model_tool.create_openai_llm() tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) - notes = tool.get_final_release_notes_commits(target_version=version) + commit_list = tool.get_final_release_notes_commits(target_version=version) - return {"commits": notes}, 200 + return {"commits": commit_list}, 200 From 38499c365d2734ffc43dcb5566262f1e8cda0e0d Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 29 Apr 2025 13:31:32 -0400 Subject: [PATCH 69/70] Changed input to have channel and release separately --- bugbug/tools/release_notes.py | 20 ++++++++++++++------ functions/release_notes/main.py | 12 ++++++++---- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index bfbfcc7c25..8ce997433c 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -197,9 +197,14 @@ def filter_irrelevant_commits(self, commit_log_list: list[dict]) -> Iterator[str yield desc def get_commit_logs( - self, preceding_version: str, target_version: str + self, target_release: int, channel: str ) -> Optional[list[dict]]: - url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={preceding_version}&tochange={target_version}&full=1" + preceding_release = target_release - 1 + + target_version = f"FIREFOX_{channel}_{target_release}_BASE".upper() + preceding_version = f"FIREFOX_{channel}_{preceding_release}_BASE".upper() + + url = f"https://hg.mozilla.org/releases/mozilla-{channel.lower()}/json-pushes?fromchange={preceding_version}&tochange={target_version}&full=1" response = requests.get(url) response.raise_for_status() data = response.json() @@ -236,11 +241,14 @@ def remove_duplicate_bugs(self, csv_text: str) -> str: return "\n".join(unique_lines) def get_final_release_notes_commits( - self, target_version: str + self, target_release: int, channel: str ) -> Optional[list[str]]: - preceding_version = get_previous_version(target_version) - logger.info(f"Generating commit shortlist for: {target_version}") - commit_log_list = self.get_commit_logs(preceding_version, target_version) + logger.info( + f"Generating commit shortlist for release {target_release} in channel {channel}" + ) + commit_log_list = self.get_commit_logs( + target_release=target_release, channel=channel + ) if not commit_log_list: return None diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 901500ad74..4468d764a5 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -16,14 +16,18 @@ def handle_release_notes(request: flask.Request): if request.method != "GET": return "Only GET requests are allowed", 405 - version = request.args.get("version") - if not version: - return "Missing 'version' query parameter", 400 + release = request.args.get("release") + channel = request.args.get("channel") + + if not release or not channel: + return "Missing 'release' or 'channel' query parameter", 400 if tool is None: llm = generative_model_tool.create_openai_llm() tool = ReleaseNotesCommitsSelector(chunk_size=DEFAULT_CHUNK_SIZE, llm=llm) - commit_list = tool.get_final_release_notes_commits(target_version=version) + commit_list = tool.get_final_release_notes_commits( + target_release=release, channel=channel + ) return {"commits": commit_list}, 200 From 2187aab49419bde8395d8c82611fc703b96c656e Mon Sep 17 00:00:00 2001 From: Benjamin Mah Date: Tue, 29 Apr 2025 15:24:06 -0400 Subject: [PATCH 70/70] Removed test and function --- bugbug/tools/release_notes.py | 14 -------------- functions/release_notes/main.py | 2 +- tests/test_release_notes.py | 7 ------- 3 files changed, 1 insertion(+), 22 deletions(-) delete mode 100644 tests/test_release_notes.py diff --git a/bugbug/tools/release_notes.py b/bugbug/tools/release_notes.py index 8ce997433c..fa02a18064 100644 --- a/bugbug/tools/release_notes.py +++ b/bugbug/tools/release_notes.py @@ -29,20 +29,6 @@ ] -def get_previous_version(target_version: str) -> str: - match = re.search(r"(\d+)", target_version) - if not match: - raise ValueError("No number found in the version string") - - number = match.group(0) - decremented_number = str(int(number) - 1) - return ( - target_version[: match.start()] - + decremented_number - + target_version[match.end() :] - ) - - def fetch_bug_components(bug_ids: list[int]) -> dict[int, str]: bug_id_to_component = {} diff --git a/functions/release_notes/main.py b/functions/release_notes/main.py index 4468d764a5..b5a5fbfc55 100644 --- a/functions/release_notes/main.py +++ b/functions/release_notes/main.py @@ -16,7 +16,7 @@ def handle_release_notes(request: flask.Request): if request.method != "GET": return "Only GET requests are allowed", 405 - release = request.args.get("release") + release = int(request.args.get("release")) channel = request.args.get("channel") if not release or not channel: diff --git a/tests/test_release_notes.py b/tests/test_release_notes.py deleted file mode 100644 index a89d419192..0000000000 --- a/tests/test_release_notes.py +++ /dev/null @@ -1,7 +0,0 @@ -from bugbug.tools.release_notes import get_previous_version - - -def test_get_previous_version(): - assert get_previous_version("FIREFOX_BETA_135_BASE") == "FIREFOX_BETA_134_BASE" - assert get_previous_version("FIREFOX_NIGHTLY_132") == "FIREFOX_NIGHTLY_131" - assert get_previous_version("FIREFOX_RELEASE_130_2") == "FIREFOX_RELEASE_129_2"