@@ -68,7 +68,7 @@ def __init__(self, chunk_size: int, llm: LLMChain):
68
68
self .bug_id_to_component : dict [int , str ] = {}
69
69
self .llm = llm
70
70
self .summarization_prompt = PromptTemplate (
71
- input_variables = ["input_text " ],
71
+ input_variables = ["commit_list " ],
72
72
template = """You are an expert in writing Firefox release notes. Your task is to analyze a list of commits and identify important user-facing changes. Follow these steps:
73
73
74
74
1. Must Include Only Meaningful Changes:
@@ -98,14 +98,14 @@ def __init__(self, chunk_size: int, llm: LLMChain):
98
98
4. Select Only the Top 10 Commits:
99
99
- If there are more than 10 relevant commits, choose the most impactful ones.
100
100
101
- 5. Input:
102
- Here is the chunk of commit logs you need to focus on:
103
- {input_text}
104
-
105
- 6. Output Requirements:
101
+ 5. Output Requirements:
106
102
- Output must be raw CSV text—no formatting, no extra text.
107
103
- Do not wrap the output in triple backticks (` ``` `) or use markdown formatting.
108
104
- Do not include the words "CSV" or any headers—just the data.
105
+
106
+ 6. Input:
107
+ Here is the list of commits you need to focus on:
108
+ {commit_list}
109
109
""" ,
110
110
)
111
111
@@ -131,14 +131,14 @@ def __init__(self, chunk_size: int, llm: LLMChain):
131
131
- Obscure web compatibility changes that apply only to edge-case websites.
132
132
- Duplicate entries or similar changes that were already listed.
133
133
134
- Here is the list to filter:
135
- {combined_list}
136
-
137
134
Instructions:
138
135
- KEEP THE SAME FORMAT (do not change the structure of entries that remain).
139
136
- REMOVE UNWORTHY ENTRIES ENTIRELY (do not rewrite them—just delete).
140
137
- DO NOT ADD ANY TEXT BEFORE OR AFTER THE LIST.
141
138
- The output must be only the cleaned-up list, formatted exactly the same way.
139
+
140
+ Here is the list to filter:
141
+ {combined_list}
142
142
""" ,
143
143
)
144
144
@@ -157,13 +157,11 @@ def generate_commit_shortlist(self, commit_log_list: list[str]) -> list[str]:
157
157
commit_log_list_combined = "\n " .join (commit_log_list )
158
158
chunks = self .batch_commit_logs (commit_log_list_combined )
159
159
return [
160
- self .summarization_chain .run ({"input_text " : chunk }).strip ()
160
+ self .summarization_chain .run ({"commit_list " : chunk }).strip ()
161
161
for chunk in chunks
162
162
]
163
163
164
- def filter_irrelevant_commits (
165
- self , commit_log_list : list [tuple [str , str , str ]]
166
- ) -> Iterator [str ]:
164
+ def filter_irrelevant_commits (self , commit_log_list : list [dict ]) -> Iterator [str ]:
167
165
ignore_revs_url = "https://hg.mozilla.org/mozilla-central/raw-file/tip/.hg-annotate-ignore-revs"
168
166
response = requests .get (ignore_revs_url )
169
167
response .raise_for_status ()
@@ -174,45 +172,54 @@ def filter_irrelevant_commits(
174
172
if re .search (r"Bug \d+" , line , re .IGNORECASE )
175
173
}
176
174
177
- for desc , author , node in commit_log_list :
178
- bug_match = re .search (r"(Bug (\d+).*)" , desc , re .IGNORECASE )
175
+ for commit in commit_log_list :
176
+ desc = commit ["desc" ]
177
+ author = commit ["author" ]
178
+ node = commit ["node" ]
179
+ bug_id = commit ["bug_id" ]
180
+
179
181
if (
180
182
not any (
181
183
keyword .lower () in desc .lower () for keyword in KEYWORDS_TO_REMOVE
182
184
)
183
- and bug_match
185
+ and bug_id
184
186
and re .search (r"\br=[^\s,]+" , desc )
185
187
and author
186
188
!= "Mozilla Releng Treescript <release+treescript@mozilla.org>"
187
189
and node not in hashes_to_ignore
188
190
):
189
- bug_id = int (bug_match .group (2 ))
190
-
191
191
bug_component = self .bug_id_to_component .get (bug_id )
192
192
if bug_component and any (
193
193
to_ignore in bug_component
194
194
for to_ignore in PRODUCT_OR_COMPONENT_TO_IGNORE
195
195
):
196
196
continue
197
- yield bug_match . group ( 1 )
197
+ yield desc
198
198
199
199
def get_commit_logs (
200
200
self , preceding_version : str , target_version : str
201
- ) -> Optional [list [tuple [ str , str , str ] ]]:
201
+ ) -> Optional [list [dict ]]:
202
202
url = f"https://hg.mozilla.org/releases/mozilla-release/json-pushes?fromchange={ preceding_version } &tochange={ target_version } &full=1"
203
203
response = requests .get (url )
204
204
response .raise_for_status ()
205
205
data = response .json ()
206
- commit_log_list = [
207
- (
208
- changeset ["desc" ].strip (),
209
- changeset .get ("author" , "" ).strip (),
210
- changeset .get ("node" , "" ).strip (),
211
- )
212
- for push_data in data .values ()
213
- for changeset in push_data ["changesets" ]
214
- if "desc" in changeset and changeset ["desc" ].strip ()
215
- ]
206
+ commit_log_list = []
207
+ for push_data in data .values ():
208
+ for changeset in push_data ["changesets" ]:
209
+ if "desc" in changeset and changeset ["desc" ].strip ():
210
+ desc = changeset ["desc" ].strip ()
211
+ author = changeset .get ("author" , "" ).strip ()
212
+ node = changeset .get ("node" , "" ).strip ()
213
+ match = re .search (r"Bug (\d+)" , desc , re .IGNORECASE )
214
+ bug_id = int (match .group (1 )) if match else None
215
+ commit_log_list .append (
216
+ {
217
+ "desc" : desc ,
218
+ "author" : author ,
219
+ "node" : node ,
220
+ "bug_id" : bug_id ,
221
+ }
222
+ )
216
223
return commit_log_list if commit_log_list else None
217
224
218
225
def remove_duplicate_bugs (self , csv_text : str ) -> str :
@@ -238,11 +245,7 @@ def get_final_release_notes_commits(
238
245
if not commit_log_list :
239
246
return None
240
247
241
- bug_ids = []
242
- for desc , _ , _ in commit_log_list :
243
- match = re .search (r"Bug (\d+)" , desc , re .IGNORECASE )
244
- if match :
245
- bug_ids .append (int (match .group (1 )))
248
+ bug_ids = [commit ["bug_id" ] for commit in commit_log_list if commit ["bug_id" ]]
246
249
247
250
self .bug_id_to_component = fetch_bug_components (bug_ids )
248
251
filtered_commits = list (self .filter_irrelevant_commits (commit_log_list ))
0 commit comments