From a4384d9d48d731e40cbaf035cc9862b534fe3737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 14 Jul 2023 04:55:14 +0200 Subject: [PATCH] GH-36634: [Dev] Ensure merge script goes over all pages when requesting info from GitHub (#36637) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change We currently were missing maintenance branches due to pagination on GH API. ### What changes are included in this PR? Check whether the API is returning a paginated view and extend the list returned. ### Are these changes tested? I have tested locally: ``` (Pdb) pr.maintenance_branches ['maint-0.11.x', 'maint-0.12.x', 'maint-0.14.x', 'maint-0.15.x', 'maint-0.17.x', 'maint-1.0.x', 'maint-3.0.x', 'maint-4.0.x', 'maint-6.0.x', 'maint-7.0.x', 'maint-7.0.1', 'maint-8.0.x', 'maint-9.0.0', 'maint-10.0.x', 'maint-10.0.0', 'maint-10.0.1', 'maint-11.0.0', 'maint-12.0.x', 'maint-12.0.0', 'maint-12.0.1', 'maint-13.0.0'] (Pdb) c Enter fix version [14.0.0]: ``` ### Are there any user-facing changes? No * Closes: #36634 Lead-authored-by: Raúl Cumplido Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/merge_arrow_pr.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 90b2e9b034eea..0f36a5ba9025c 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -78,7 +78,24 @@ def get_json(url, headers=None): response = requests.get(url, headers=headers) if response.status_code != 200: raise ValueError(response.json()) - return response.json() + # GitHub returns a link header with the next, previous, last + # page if there is pagination on the response. See: + # https://docs.github.com/en/rest/guides/using-pagination-in-the-rest-api#using-link-headers + next_responses = None + if "link" in response.headers: + links = response.headers['link'].split(', ') + for link in links: + if 'rel="next"' in link: + # Format: '; rel="next"' + next_url = link.split(";")[0][1:-1] + next_responses = get_json(next_url, headers) + responses = response.json() + if next_responses: + if isinstance(responses, list): + responses.extend(next_responses) + else: + raise ValueError('GitHub response was paginated and is not a list') + return responses def run_cmd(cmd):