Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show actual Job URL on test report #6837

Merged
merged 2 commits into from
Aug 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ tags
.mypy_cache/

reports/
test_report.*
test_report*.html
test_report*.db
test_short_report.html

# Test failures will dump the cluster state in here
Expand Down
148 changes: 101 additions & 47 deletions continuous_integration/scripts/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import sys
import zipfile
from collections.abc import Iterator
from typing import Any
from typing import Any, Iterable, cast

import altair
import altair_saver
Expand Down Expand Up @@ -102,6 +102,39 @@ def maybe_get_next_page_path(response: requests.Response) -> str | None:
return next_page_path


def get_jobs(workflow):
with shelve.open("test_report_jobs") as cache:
url = workflow["jobs_url"]
try:
jobs = cache[url]
except KeyError:
params = {"per_page": 100}
r = get_from_github(workflow["jobs_url"], params)
jobs = r.json()["jobs"]
while next_page := maybe_get_next_page_path(r):
r = get_from_github(next_page, params=params)
jobs.extend(r.json()["jobs"])
cache[url] = jobs

df_jobs = pandas.DataFrame.from_records(jobs)
extracted = df_jobs.name.str.extract(
r"\(([\w\-]+), (\d\.\d+),\s([\w|\s]+)\)"
).dropna()
df_jobs["OS"] = extracted[0]
df_jobs["python_version"] = extracted[1]
df_jobs["partition"] = extracted[2]
# We later need to join on this. Somehow the job ID is not part of the workflow schema and we have no other way to join
ian-r-rose marked this conversation as resolved.
Show resolved Hide resolved
df_jobs["suite_name"] = (
df_jobs["OS"]
+ "-"
+ df_jobs["python_version"]
+ "-"
+ df_jobs["partition"].str.replace(" ", "")
)

return df_jobs


def get_workflow_listing(repo: str, branch: str, event: str, days: int) -> list[dict]:
"""
Get a list of workflow runs from GitHub actions.
Expand Down Expand Up @@ -153,26 +186,32 @@ def download_and_parse_artifact(url: str) -> junitparser.JUnitXml | None:
"""
Download the artifact at the url parse it.
"""
with shelve.open("test_report") as cache:
try:
xml_raw = cache[url]
except KeyError:
r = get_from_github(url, params={})
f = zipfile.ZipFile(io.BytesIO(r.content))
cache[url] = xml_raw = f.read(f.filelist[0].filename)
try:
r = get_from_github(url, params={})
f = zipfile.ZipFile(io.BytesIO(r.content))
run = junitparser.JUnitXml.fromstring(f.read(f.filelist[0].filename))
return run
return junitparser.JUnitXml.fromstring(xml_raw)
except Exception:
print(f"Failed to download/parse {url}")
# XMLs also include things like schedule which is a simple json
return None


def dataframe_from_jxml(run: list) -> pandas.DataFrame:
def dataframe_from_jxml(run: Iterable) -> pandas.DataFrame:
"""
Turn a parsed JXML into a pandas dataframe
"""
fname = []
tname = []
status = []
message = []
sname = []
for suite in run:
for test in suite:
sname.append(suite.name)
fname.append(test.classname)
tname.append(test.name)
s = "✓"
Expand All @@ -195,7 +234,13 @@ def dataframe_from_jxml(run: list) -> pandas.DataFrame:
status.append(s)
message.append(html.escape(m))
df = pandas.DataFrame(
{"file": fname, "test": tname, "status": status, "message": message}
{
"file": fname,
"test": tname,
"status": status,
"message": message,
"suite_name": sname,
}
)

# There are sometimes duplicate tests in the report for some unknown reason.
Expand All @@ -212,7 +257,7 @@ def dedup(group):
else:
return group

return df.groupby(["file", "test"]).agg(dedup)
return df.groupby(["file", "test"], as_index=False).agg(dedup)


def download_and_parse_artifacts(
Expand Down Expand Up @@ -258,40 +303,39 @@ def download_and_parse_artifacts(
ndownloaded = 0
print(f"Downloading and parsing {nartifacts} artifacts...")

cache: shelve.Shelf[pandas.DataFrame | None]
# FIXME https://github.com/python/typeshed/pull/8190
with shelve.open("test_report") as cache: # type: ignore[assignment]
for w in workflows:
w["dfs"] = []
for a in w["artifacts"]:
url = a["archive_download_url"]
df: pandas.DataFrame | None
try:
df = cache[url]
except KeyError:
xml = download_and_parse_artifact(url)
if xml:
df = dataframe_from_jxml(xml)
# Note: we assign a column with the workflow timestamp rather
# than the artifact timestamp so that artifacts triggered under
# the same workflow can be aligned according to the same trigger
# time.
df = df.assign(
name=a["name"],
suite=suite_from_name(a["name"]),
date=w["created_at"],
url=w["html_url"],
)
else:
df = None
cache[url] = df

if df is not None:
yield df

ndownloaded += 1
if ndownloaded and not ndownloaded % 20:
print(f"{ndownloaded}... ", end="")
for w in workflows:
jobs_df = get_jobs(w)
w["dfs"] = []
for a in w["artifacts"]:
url = a["archive_download_url"]
df: pandas.DataFrame | None
xml = download_and_parse_artifact(url)
if xml is None:
continue
df = dataframe_from_jxml(cast(Iterable, xml))
# Note: we assign a column with the workflow timestamp rather
# than the artifact timestamp so that artifacts triggered under
# the same workflow can be aligned according to the same trigger
# time.
html_url = jobs_df[jobs_df["suite_name"] == a["name"]].html_url.unique()
assert (
len(html_url) == 1
), f"Artifact suit name {a['name']} did not match any jobs dataframe {jobs_df['suite_name'].unique()}"
html_url = html_url[0]
assert html_url is not None
df2 = df.assign(
name=a["name"],
suite=suite_from_name(a["name"]),
date=w["created_at"],
html_url=html_url,
)

if df2 is not None:
yield df2

ndownloaded += 1
if ndownloaded and not ndownloaded % 20:
print(f"{ndownloaded}... ", end="")


def main(argv: list[str] | None = None) -> None:
Expand All @@ -313,10 +357,20 @@ def main(argv: list[str] | None = None) -> None:
max_workflows=args.max_workflows,
)
)

total = pandas.concat(dfs, axis=0)
# Reduce the size of the DF since the entire thing is encoded in the vega spec
ian-r-rose marked this conversation as resolved.
Show resolved Hide resolved
required_columns = [
"test",
"date",
"suite",
"file",
"html_url",
"status",
"message",
]
total = total[required_columns]
grouped = (
total.groupby(total.index)
total.groupby([total.file, total.test])
.filter(lambda g: (g.status == "x").sum() >= args.nfails)
.reset_index()
.assign(test=lambda df: df.file + "." + df.test)
Expand Down Expand Up @@ -356,15 +410,15 @@ def main(argv: list[str] | None = None) -> None:
.encode(
x=altair.X("date:O", scale=altair.Scale(domain=sorted(list(times)))),
y=altair.Y("suite:N", title=None),
href=altair.Href("url:N"),
href=altair.Href("html_url:N"),
color=altair.Color(
"status:N",
scale=altair.Scale(
domain=list(COLORS.keys()),
range=list(COLORS.values()),
),
),
tooltip=["suite:N", "date:O", "status:N", "message:N", "url:N"],
tooltip=["suite:N", "date:O", "status:N", "message:N", "html_url:N"],
)
.properties(title=name)
| altair.Chart(df_agg.assign(_="_"))
Expand Down