@@ -68,7 +68,7 @@ def __init__(self, chunk_size: int, llm: LLMChain):
6868 self .bug_id_to_component : dict [int , str ] = {}
6969 self .llm = llm
7070 self .summarization_prompt = PromptTemplate (
71- input_variables = ["input_text " ],
71+ input_variables = ["commit_list " ],
7272 template = """You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps:
7373
74741. Must Include Only Meaningful Changes:
@@ -98,14 +98,14 @@ def __init__(self, chunk_size: int, llm: LLMChain):
98984. Select Only the Top 10 Commits:
9999 - If there are more than 10 relevant commits, choose the most impactful ones.
100100
101- 5. Input:
102- Here is the chunk of commit logs you need to focus on:
103- {input_text}
104-
105- 6. Output Requirements:
101+ 5. Output Requirements:
106102 - Output must be raw CSV text—no formatting, no extra text.
107103 - Do not wrap the output in triple backticks (` ``` `) or use markdown formatting.
108104 - Do not include the words "CSV" or any headers—just the data.
105+
106+ 6. Input:
107+ Here is the list of commits you need to focus on:
108+ {commit_list}
109109""" ,
110110 )
111111
@@ -131,14 +131,14 @@ def __init__(self, chunk_size: int, llm: LLMChain):
131131- Obscure web compatibility changes that apply only to edge-case websites.
132132- Duplicate entries or similar changes that were already listed.
133133
134- Here is the list to filter:
135- {combined_list}
136-
137134Instructions:
138135- KEEP THE SAME FORMAT (do not change the structure of entries that remain).
139136- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete).
140137- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST.
141138- The output must be only the cleaned-up list, formatted exactly the same way.
139+
140+ Here is the list to filter:
141+ {combined_list}
142142""" ,
143143 )
144144
@@ -157,13 +157,11 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]:
157157 commit_log_list_combined = "\n " .join (commit_log_list )
158158 chunks = self .batch_commit_logs (commit_log_list_combined )
159159 return [
160- self .summarization_chain .run ({"input_text " : chunk }).strip ()
160+ self .summarization_chain .run ({"commit_list " : chunk }).strip ()
161161 for chunk in chunks
162162 ]
163163
164- def filter_irrelevant_commits (
165- self , commit_log_list : list [tuple [str , str , str ]]
166- ) -> Iterator [str ]:
164+ def filter_irrelevant_commits (self , commit_log_list : list [dict ]) -> Iterator [str ]:
167165 ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs"
168166 response = requests .get (ignore_revs_url )
169167 response .raise_for_status ()
@@ -174,45 +172,54 @@ def filter_irrelevant_commits(
174172 if re .search (r"Bug \d+" , line , re .IGNORECASE )
175173 }
176174
177- for desc , author , node in commit_log_list :
178- bug_match = re .search (r"(Bug (\d+).*)" , desc , re .IGNORECASE )
175+ for commit in commit_log_list :
176+ desc = commit ["desc" ]
177+ author = commit ["author" ]
178+ node = commit ["node" ]
179+ bug_id = commit ["bug_id" ]
180+
179181 if (
180182 not any (
181183 keyword .lower () in desc .lower () for keyword in KEYWORDS_TO_REMOVE
182184 )
183- and bug_match
185+ and bug_id
184186 and re .search (r"\br=[^\s,]+" , desc )
185187 and author
186188 != "Mozilla Releng Treescript <release+treescript@mozilla.org>"
187189 and node not in hashes_to_ignore
188190 ):
189- bug_id = int (bug_match .group (2 ))
190-
191191 bug_component = self .bug_id_to_component .get (bug_id )
192192 if bug_component and any (
193193 to_ignore in bug_component
194194 for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE
195195 ):
196196 continue
197- yield bug_match . group ( 1 )
197+ yield desc
198198
199199 def get_commit_logs (
200200 self , preceding_version : str , target_version : str
201- ) -> Optional [list [tuple [ str , str , str ] ]]:
201+ ) -> Optional [list [dict ]]:
202202 url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={ preceding_version } &tochange={ target_version } &full=1"
203203 response = requests .get (url )
204204 response .raise_for_status ()
205205 data = response .json ()
206- commit_log_list = [
207- (
208- changeset ["desc" ].strip (),
209- changeset .get ("author" , "" ).strip (),
210- changeset .get ("node" , "" ).strip (),
211- )
212- for push_data in data .values ()
213- for changeset in push_data ["changesets" ]
214- if "desc" in changeset and changeset ["desc" ].strip ()
215- ]
206+ commit_log_list = []
207+ for push_data in data .values ():
208+ for changeset in push_data ["changesets" ]:
209+ if "desc" in changeset and changeset ["desc" ].strip ():
210+ desc = changeset ["desc" ].strip ()
211+ author = changeset .get ("author" , "" ).strip ()
212+ node = changeset .get ("node" , "" ).strip ()
213+ match = re .search (r"Bug (\d+)" , desc , re .IGNORECASE )
214+ bug_id = int (match .group (1 )) if match else None
215+ commit_log_list .append (
216+ {
217+ "desc" : desc ,
218+ "author" : author ,
219+ "node" : node ,
220+ "bug_id" : bug_id ,
221+ }
222+ )
216223 return commit_log_list if commit_log_list else None
217224
218225 def remove_duplicate_bugs (self , csv_text : str ) -> str :
@@ -238,11 +245,7 @@ def get_final_release_notes_commits(
238245 if not commit_log_list :
239246 return None
240247
241- bug_ids = []
242- for desc , _ , _ in commit_log_list :
243- match = re .search (r"Bug (\d+)" , desc , re .IGNORECASE )
244- if match :
245- bug_ids .append (int (match .group (1 )))
248+ bug_ids = [commit ["bug_id" ] for commit in commit_log_list if commit ["bug_id" ]]
246249
247250 self .bug_id_to_component = fetch_bug_components (bug_ids )
248251 filtered_commits = list (self .filter_irrelevant_commits (commit_log_list ))
0 commit comments