Skip to content

Commit

Permalink
Size reports: Improved comments (#10982)
Browse files Browse the repository at this point in the history
#### Problem

It's hard to pick out actual size changes, beyond those called out in
the large-increase tables.

Report information can be distributed across multiple collapsed tables,
since the generator often runs before all builds are complete.

#### Change overview

Read back any existing comment and merge in new reports.

Comments now contain:

1. An open table highlighting large increases, if any.
2. A collapsed table of all increases, if any.
3. A collapsed table of all decreases, if any.
4. A collapsed table of all data.

#### Testing

Tested with manual runs:
#10979 (comment)
  • Loading branch information
kpschoedel authored and pull[bot] committed Nov 3, 2021
1 parent ce83816 commit 2905106
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 64 deletions.
218 changes: 160 additions & 58 deletions scripts/tools/memory/gh_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import logging
import os
import os.path
import re
import sqlite3
import sys
import zipfile
Expand Down Expand Up @@ -101,6 +102,14 @@
'type': int,
},
},
'github.limit-pr': {
'help': 'Report only on PR, if present.',
'metavar': 'PR',
'default': 0,
'argparse': {
'type': int,
},
},
Config.group_map('report'): {
'group': 'output'
},
Expand Down Expand Up @@ -194,8 +203,11 @@ def add_sizes(self, **kwargs):
bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')}
cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
for d in kwargs['sizes']:
self.store('size', build_id=build, **d)
if build is None:
logging.error('Failed to store %s %s %s', thing, bd, cd)
else:
for d in kwargs['sizes']:
self.store('size', build_id=build, **d)

def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
"""Add sizes from a JSON size report."""
Expand Down Expand Up @@ -331,9 +343,14 @@ def delete_stale_builds(self, build_ids: Iterable[int]):
self.commit()

def delete_artifact(self, artifact_id: int):
if self.gh and artifact_id and artifact_id not in self.deleted_artifacts:
if (self.gh and artifact_id
and artifact_id not in self.deleted_artifacts):
self.deleted_artifacts.add(artifact_id)
self.gh.actions.delete_artifact(artifact_id)
try:
self.gh.actions.delete_artifact(artifact_id)
except Exception:
# During manual testing we sometimes lose the race against CI.
logging.error('Failed to delete artifact %d', artifact_id)

def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
if not self.config['github.keep']:
Expand Down Expand Up @@ -454,11 +471,16 @@ def changes_for_commit(db: SizeDatabase, pr: int, commit: str,
return df


def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
tdf: pd.DataFrame) -> bool:
comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")


def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool:
"""Send a change report as a github comment."""

if not db.gh:
return False

# Look for an existing comment for this change.
pr = df.attrs['pr']

# Check the most recent commit on the PR, so that we don't comment on
Expand All @@ -475,61 +497,147 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
# Check for an existing size report comment. If one exists, we'll add
# the new report to it.
title = df.attrs['title']
existing_comment_id = 0
existing_comment = None
existing_comment_format = 0
for comment in gh_get_comments_for_pr(db.gh, pr):
if comment.body.partition('\n')[0] == df.attrs['title']:
existing_comment_id = comment.id
title = comment.body
comment_parts = comment.body.partition('\n')
if comment_parts[0].strip() == title:
existing_comment = comment
if m := comment_format_re.match(comment_parts[2]):
existing_comment_format = int(m.group(1))
break

md = io.StringIO()
md.write(title)
md.write('\n')

if tdf is not None and not tdf.empty:
md.write(f'\n**{tdf.attrs["title"]}:**\n\n')
memdf.report.write_df(db.config,
tdf,
md,
'pipe',
hierify=True,
title=False,
tabulate={'floatfmt': '5.1f'})

count = len(df.attrs['things'])
platforms = ', '.join(sorted(list(set(df['platform']))))
summary = f'{count} build{"" if count == 1 else "s"} (for {platforms})'
md.write(f'\n<details>\n<summary>{summary}</summary>\n\n')
memdf.report.write_df(db.config,
df,
md,
'pipe',
hierify=True,
title=False,
tabulate={'floatfmt': '5.1f'})
md.write('\n</details>\n')
text = md.getvalue()
md.close()
if existing_comment_format != 1:
existing_comment = None
text = gh_comment_v1(db, df, existing_comment)

if existing_comment_id:
comment = f'updating comment {existing_comment_id}'
else:
comment = 'as new comment'
logging.info('SCR: %s for %s %s', df.attrs['title'], summary, comment)
logging.info(
'SCR: %s %s', df.attrs['title'],
f'updating comment {existing_comment.id}'
if existing_comment else 'as new comment')

if db.config['github.dryrun-comment']:
logging.debug('%s', text)
return False

try:
if existing_comment_id:
db.gh.issues.update_comment(existing_comment_id, text)
if existing_comment:
db.gh.issues.update_comment(existing_comment.id, text)
else:
db.gh.issues.create_comment(pr, text)
return True
except Exception:
return False


def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str:
"""Format a github comment."""

if existing_comment:
df = v1_comment_merge(df, existing_comment)

threshold_df = None
increase_df = df[df['change'] > 0]
if increase_df.empty:
increase_df = None
elif threshold := db.config['report.increases']:
threshold_df = df[df['% change'] > threshold]
if threshold_df.empty:
threshold_df = None
decrease_df = df[df['change'] < 0]
if decrease_df.empty:
decrease_df = None

with io.StringIO() as md:
md.write(df.attrs['title'])
md.write('\n<!--ghr-comment-format:1-->\n\n')

if threshold_df is not None:
md.write(f'**Increases above {threshold:.2g}%:**\n\n')
md.write('<!--ghr-report:threshold-->\n\n')
v1_comment_write_df(db, threshold_df, md)

if increase_df is not None:
summary = v1_comment_summary(increase_df)
md.write('<details>\n')
md.write(f'<summary>Increases ({summary})</summary>\n')
md.write('<!--ghr-report:increases-->\n\n')
v1_comment_write_df(db, increase_df, md)
md.write('</details>\n\n')

if decrease_df is not None:
summary = v1_comment_summary(decrease_df)
md.write('<details>\n')
md.write(f'<summary>Decreases ({summary})</summary>\n')
md.write('<!--ghr-report:decreases-->\n\n')
v1_comment_write_df(db, decrease_df, md)
md.write('</details>\n\n')

summary = v1_comment_summary(df)
md.write('<details>\n')
md.write(f'<summary>Full report ({summary})</summary>\n')
md.write('<!--ghr-report:full-->\n\n')
v1_comment_write_df(db, df, md)
md.write('\n</details>\n')

return md.getvalue()


def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame:
with io.StringIO(comment.body) as body:
for line in body:
if line.startswith('<!--ghr-report:full-->'):
body.readline() # Blank line before table.
header, rows = read_hierified(body)
break
logging.debug('REC: read %d rows', len(rows))
df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes))
return df.sort_values(
by=['platform', 'target', 'config', 'section']).drop_duplicates()


def read_hierified(f):
"""Read a markdown table in ‘hierified’ format."""

line = f.readline()
header = tuple((s.strip() for s in line.split('|')[1:-1]))

_ = f.readline() # The line under the header.

rows = []
for line in f:
line = line.strip()
if not line:
break
row = []
columns = line.split('|')
for i in range(0, len(header)):
column = columns[i + 1].strip()
if not column:
column = rows[-1][i]
row.append(column)
rows.append(tuple(row))

return (header, rows)


def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame,
out: memdf.report.OutputOption):
memdf.report.write_df(db.config,
df,
out,
'pipe',
hierify=True,
title=False,
tabulate={'floatfmt': '5.1f'})


def v1_comment_summary(df: pd.DataFrame) -> str:
count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
platforms = ', '.join(sorted(list(set(df['platform']))))
return f'{count} build{"" if count == 1 else "s"} for {platforms}'


def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
"""Report on all new comparable commits."""
if not (db.config['report.pr'] or db.config['report.push']):
Expand All @@ -542,30 +650,24 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:

report_push = db.config['report.push']
report_pr = db.config['report.pr']
only_pr = db.config['github.limit-pr']

dfs = {}
for pr, commit, parent in db.select_matching_commits().fetchall():
if not (report_pr if pr else report_push):
continue

# Github doesn't have a way to fetch artifacts associated with a
# particular PR. For testing purposes, filter to a single PR here.
if only_pr and pr != only_pr:
continue

df = changes_for_commit(db, pr, commit, parent)
dfs[df.attrs['name']] = df

if threshold := db.config['report.increases']:
tdf = df[df['% change'] > threshold]
else:
tdf = None
if tdf is not None and not tdf.empty:
commit = df.attrs['commit']
parent = df.attrs['parent']
tdf.attrs['name'] = f'L,{commit},{parent}'
tdf.attrs['title'] = (
f'Increases above {threshold:.1f}% from {parent} to {commit}')
dfs[tdf.attrs['name']] = tdf

if (pr and comment_enabled
and (comment_limit == 0 or comment_limit > comment_count)):
if gh_send_change_report(db, df, tdf):
if gh_send_change_report(db, df):
# Mark the originating builds, and remove the originating
# artifacts, so that they don't generate duplicate report
# comments.
Expand Down
16 changes: 10 additions & 6 deletions scripts/tools/memory/memdf/util/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,18 @@ def store(self, table: str, **kwargs):
self.connection().execute(q, v)

def get_matching(self, table: str, columns: List[str], **kwargs):
return self.connection().execute(
f"SELECT {','.join(columns)} FROM {table}"
f" WHERE {'=? AND '.join(kwargs.keys())}=?",
list(kwargs.values()))
q = (f"SELECT {','.join(columns)} FROM {table}"
f" WHERE {'=? AND '.join(kwargs.keys())}=?")
v = list(kwargs.values())
return self.connection().execute(q, v)

def get_matching_id(self, table: str, **kwargs):
return self.get_matching(table, ['id'], **kwargs).fetchone()[0]
cur = self.get_matching(table, ['id'], **kwargs)
row = cur.fetchone()
if row:
return row[0]
return None

def store_and_return_id(self, table: str, **kwargs) -> int:
def store_and_return_id(self, table: str, **kwargs) -> Optional[int]:
self.store(table, **kwargs)
return self.get_matching_id(table, **kwargs)

0 comments on commit 2905106

Please sign in to comment.