-
Notifications
You must be signed in to change notification settings - Fork 5.8k
/
fetch_release_logs.py
219 lines (166 loc) · 6.2 KB
/
fetch_release_logs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
"""
This script will automatically fetch the latest release logs from the
OSS release testing pipeline on buildkite.
Specifically, this will loop through all release test pipeline builds for the
specified Ray version and fetch the latest available results from the respective
tests. It will then write these to the directory in `ray/release/release_logs`.
To use this script, either set the BUILDKITE_TOKEN environment variable to a
valid Buildkite API token with read access, or authenticate in AWS with the
OSS CI account.
Usage:
python fetch_release_logs.py <version>
Example:
python fetch_release_logs 1.13.0rc0
Results in:
Fetched microbenchmark.json for commit 025e4b01822214e03907db0b09f3af17203a6671
...
Writing 1.13.0rc0/microbenchmark.json
...
"""
import json
import os
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
import boto3
import click
from pybuildkite.buildkite import Buildkite
BUILDKITE_ORGANIZATION = "ray-project"
BUILDKITE_PIPELINE = "release"
# Format: job name regex --> filename to save results to
RESULTS_TO_FETCH = {
r"^microbenchmark.aws \(.+\)$": "microbenchmark.json",
r"^many_actors.aws \(.+\)$": "benchmarks/many_actors.json",
r"^many_nodes.aws \(.+\)$": "benchmarks/many_nodes.json",
r"^many_pgs.aws \(.+\)$": "benchmarks/many_pgs.json",
r"^many_tasks.aws \(.+\)$": "benchmarks/many_tasks.json",
r"^object_store.aws \(.+\)$": "scalability/object_store.json",
r"^single_node.aws \(.+\)$": "scalability/single_node.json",
r"^stress_test_dead_actors.aws \(.+\)$": (
"stress_tests/stress_test_dead_actors.json"
),
r"^stress_test_many_tasks.aws \(.+\)$": "stress_tests/stress_test_many_tasks.json",
r"^stress_test_placement_group.aws \(.+\)$": (
"stress_tests/stress_test_placement_group.json"
),
}
@dataclass
class Build:
id: str
number: int
commit: str
job_dict_list: List[Dict]
pipeline: str = BUILDKITE_PIPELINE
organization: str = BUILDKITE_ORGANIZATION
@dataclass
class Job:
build: Build
id: str
name: Optional[str]
@dataclass
class Artifact:
job: Job
id: str
def get_buildkite_api() -> Buildkite:
bk = Buildkite()
buildkite_token = maybe_fetch_buildkite_token()
bk.set_access_token(buildkite_token)
return bk
def maybe_fetch_buildkite_token() -> str:
buildkite_token = os.environ.get("BUILDKITE_TOKEN", None)
if buildkite_token:
return buildkite_token
print("Missing BUILDKITE_TOKEN, retrieving from AWS secrets store")
buildkite_token = boto3.client(
"secretsmanager", region_name="us-west-2"
).get_secret_value(
SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:"
"buildkite/ro-token"
)[
"SecretString"
]
os.environ["BUILDKITE_TOKEN"] = buildkite_token
return buildkite_token
def get_results_from_build_collection(
bk: Buildkite, build_dict_list: List[Dict]
) -> Dict[str, Dict]:
results_to_fetch = RESULTS_TO_FETCH.copy()
fetched_results = {}
for build_dict in sorted(build_dict_list, key=lambda bd: -bd["number"]):
if not results_to_fetch:
break
build = Build(
id=build_dict["id"],
number=build_dict["number"],
commit=build_dict["commit"],
job_dict_list=build_dict["jobs"],
)
build_results = get_results_from_build(bk, build, results_to_fetch)
fetched_results.update(build_results)
return fetched_results
def get_results_from_build(bk: Buildkite, build: Build, results_to_fetch: Dict) -> Dict:
fetched_results = {}
for job_dict in build.job_dict_list:
if not results_to_fetch:
break
job = Job(build=build, id=job_dict["id"], name=job_dict.get("name", None))
if not job.name:
continue
for job_regex, filename in list(results_to_fetch.items()):
if re.match(job_regex, job.name):
result = get_results_artifact_for_job(bk, job=job)
if not result:
continue
fetched_results[filename] = result
results_to_fetch.pop(job_regex)
print(f"Fetched {filename} for commit {job.build.commit}")
return fetched_results
def get_results_artifact_for_job(bk: Buildkite, job: Job) -> Optional[Dict]:
artifacts = bk.artifacts().list_artifacts_for_job(
organization=job.build.organization,
pipeline=job.build.pipeline,
build=job.build.number,
job=job.id,
)
for artifact in artifacts:
if "result.json" in artifact["filename"]:
artifact = Artifact(job=job, id=artifact["id"])
return download_results_artifact(bk=bk, artifact=artifact)
return None
def download_results_artifact(bk: Buildkite, artifact: Artifact) -> Dict:
blob = bk.artifacts().download_artifact(
organization=artifact.job.build.organization,
pipeline=artifact.job.build.pipeline,
build=artifact.job.build.number,
job=artifact.job.id,
artifact=artifact.id,
)
data_dict = json.loads(blob)
return data_dict.get("results", {})
def write_results(log_dir: Path, fetched_results: Dict[str, Any]) -> None:
log_dir.mkdir(parents=True, exist_ok=True)
for filepath, content in fetched_results.items():
path = log_dir.joinpath(filepath)
path.parent.mkdir(parents=True, exist_ok=True)
print(f"Writing {path}")
with open(path, "w") as fp:
json.dump(content, fp, sort_keys=True, indent=4)
fp.write("\n")
@click.command()
@click.argument("version", required=True)
@click.argument("commit", required=True)
@click.argument("branch", required=True)
def main(version: str, commit: str, branch: str):
log_dir = Path(__file__).parent.joinpath(version)
bk = get_buildkite_api()
build_dict_list = bk.builds().list_all_for_pipeline(
organization=BUILDKITE_ORGANIZATION,
pipeline=BUILDKITE_PIPELINE,
branch=branch,
commit=commit,
)
fetched_results = get_results_from_build_collection(bk, build_dict_list)
write_results(log_dir, fetched_results)
if __name__ == "__main__":
main()