Size reports: Improved comments (#10982)

#### Problem It's hard to pick out actual size changes, beyond those called out in the large-increase tables. Report information can be distributed across multiple collapsed tables, since the generator often runs before all builds are complete. #### Change overview Read back any existing comment and merge in new reports. Comments now contain: 1. An open table highlighting large increases, if any. 2. A collapsed table of all increases, if any. 3. A collapsed table of all decreases, if any. 4. A collapsed table of all data. #### Testing Tested with manual runs: #10979 (comment)
project-chip · Nov 3, 2021 · 2905106 · 2905106
1 parent ce83816
commit 2905106
Show file tree

Hide file tree

Showing 2 changed files with 170 additions and 64 deletions.
diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py
@@ -22,6 +22,7 @@
 import logging
 import os
 import os.path
+import re
 import sqlite3
 import sys
 import zipfile
@@ -101,6 +102,14 @@
  'type': int,
  },
  },
+ 'github.limit-pr': {
+ 'help': 'Report only on PR, if present.',
+ 'metavar': 'PR',
+ 'default': 0,
+ 'argparse': {
+ 'type': int,
+ },
+ },
  Config.group_map('report'): {
  'group': 'output'
  },
@@ -194,8 +203,11 @@ def add_sizes(self, **kwargs):
  bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')}
  cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
  build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
- for d in kwargs['sizes']:
- self.store('size', build_id=build, **d)
+ if build is None:
+ logging.error('Failed to store %s %s %s', thing, bd, cd)
+ else:
+ for d in kwargs['sizes']:
+ self.store('size', build_id=build, **d)
 
  def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
  """Add sizes from a JSON size report."""
@@ -331,9 +343,14 @@ def delete_stale_builds(self, build_ids: Iterable[int]):
  self.commit()
 
  def delete_artifact(self, artifact_id: int):
- if self.gh and artifact_id and artifact_id not in self.deleted_artifacts:
+ if (self.gh and artifact_id
+ and artifact_id not in self.deleted_artifacts):
  self.deleted_artifacts.add(artifact_id)
- self.gh.actions.delete_artifact(artifact_id)
+ try:
+ self.gh.actions.delete_artifact(artifact_id)
+ except Exception:
+ # During manual testing we sometimes lose the race against CI.
+ logging.error('Failed to delete artifact %d', artifact_id)
 
  def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
  if not self.config['github.keep']:
@@ -454,11 +471,16 @@ def changes_for_commit(db: SizeDatabase, pr: int, commit: str,
  return df
 
 
-def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
- tdf: pd.DataFrame) -> bool:
+comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")
+
+
+def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool:
  """Send a change report as a github comment."""
+
  if not db.gh:
  return False
+
+ # Look for an existing comment for this change.
  pr = df.attrs['pr']
 
  # Check the most recent commit on the PR, so that we don't comment on
@@ -475,61 +497,147 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
  # Check for an existing size report comment. If one exists, we'll add
  # the new report to it.
  title = df.attrs['title']
- existing_comment_id = 0
+ existing_comment = None
+ existing_comment_format = 0
  for comment in gh_get_comments_for_pr(db.gh, pr):
- if comment.body.partition('\n')[0] == df.attrs['title']:
- existing_comment_id = comment.id
- title = comment.body
+ comment_parts = comment.body.partition('\n')
+ if comment_parts[0].strip() == title:
+ existing_comment = comment
+ if m := comment_format_re.match(comment_parts[2]):
+ existing_comment_format = int(m.group(1))
  break
 
- md = io.StringIO()
- md.write(title)
- md.write('\n')
-
- if tdf is not None and not tdf.empty:
- md.write(f'\n**{tdf.attrs["title"]}:**\n\n')
- memdf.report.write_df(db.config,
- tdf,
- md,
- 'pipe',
- hierify=True,
- title=False,
- tabulate={'floatfmt': '5.1f'})
-
- count = len(df.attrs['things'])
- platforms = ', '.join(sorted(list(set(df['platform']))))
- summary = f'{count} build{"" if count == 1 else "s"} (for {platforms})'
- md.write(f'\n<details>\n<summary>{summary}</summary>\n\n')
- memdf.report.write_df(db.config,
- df,
- md,
- 'pipe',
- hierify=True,
- title=False,
- tabulate={'floatfmt': '5.1f'})
- md.write('\n</details>\n')
- text = md.getvalue()
- md.close()
+ if existing_comment_format != 1:
+ existing_comment = None
+ text = gh_comment_v1(db, df, existing_comment)
 
- if existing_comment_id:
- comment = f'updating comment {existing_comment_id}'
- else:
- comment = 'as new comment'
- logging.info('SCR: %s for %s %s', df.attrs['title'], summary, comment)
+ logging.info(
+ 'SCR: %s %s', df.attrs['title'],
+ f'updating comment {existing_comment.id}'
+ if existing_comment else 'as new comment')
 
  if db.config['github.dryrun-comment']:
+ logging.debug('%s', text)
  return False
 
  try:
- if existing_comment_id:
- db.gh.issues.update_comment(existing_comment_id, text)
+ if existing_comment:
+ db.gh.issues.update_comment(existing_comment.id, text)
  else:
  db.gh.issues.create_comment(pr, text)
  return True
  except Exception:
  return False
 
 
+def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str:
+ """Format a github comment."""
+
+ if existing_comment:
+ df = v1_comment_merge(df, existing_comment)
+
+ threshold_df = None
+ increase_df = df[df['change'] > 0]
+ if increase_df.empty:
+ increase_df = None
+ elif threshold := db.config['report.increases']:
+ threshold_df = df[df['% change'] > threshold]
+ if threshold_df.empty:
+ threshold_df = None
+ decrease_df = df[df['change'] < 0]
+ if decrease_df.empty:
+ decrease_df = None
+
+ with io.StringIO() as md:
+ md.write(df.attrs['title'])
+ md.write('\n<!--ghr-comment-format:1-->\n\n')
+
+ if threshold_df is not None:
+ md.write(f'**Increases above {threshold:.2g}%:**\n\n')
+ md.write('<!--ghr-report:threshold-->\n\n')
+ v1_comment_write_df(db, threshold_df, md)
+
+ if increase_df is not None:
+ summary = v1_comment_summary(increase_df)
+ md.write('<details>\n')
+ md.write(f'<summary>Increases ({summary})</summary>\n')
+ md.write('<!--ghr-report:increases-->\n\n')
+ v1_comment_write_df(db, increase_df, md)
+ md.write('</details>\n\n')
+
+ if decrease_df is not None:
+ summary = v1_comment_summary(decrease_df)
+ md.write('<details>\n')
+ md.write(f'<summary>Decreases ({summary})</summary>\n')
+ md.write('<!--ghr-report:decreases-->\n\n')
+ v1_comment_write_df(db, decrease_df, md)
+ md.write('</details>\n\n')
+
+ summary = v1_comment_summary(df)
+ md.write('<details>\n')
+ md.write(f'<summary>Full report ({summary})</summary>\n')
+ md.write('<!--ghr-report:full-->\n\n')
+ v1_comment_write_df(db, df, md)
+ md.write('\n</details>\n')
+
+ return md.getvalue()
+
+
+def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame:
+ with io.StringIO(comment.body) as body:
+ for line in body:
+ if line.startswith('<!--ghr-report:full-->'):
+ body.readline() # Blank line before table.
+ header, rows = read_hierified(body)
+ break
+ logging.debug('REC: read %d rows', len(rows))
+ df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes))
+ return df.sort_values(
+ by=['platform', 'target', 'config', 'section']).drop_duplicates()
+
+
+def read_hierified(f):
+ """Read a markdown table in ‘hierified’ format."""
+
+ line = f.readline()
+ header = tuple((s.strip() for s in line.split('|')[1:-1]))
+
+ _ = f.readline() # The line under the header.
+
+ rows = []
+ for line in f:
+ line = line.strip()
+ if not line:
+ break
+ row = []
+ columns = line.split('|')
+ for i in range(0, len(header)):
+ column = columns[i + 1].strip()
+ if not column:
+ column = rows[-1][i]
+ row.append(column)
+ rows.append(tuple(row))
+
+ return (header, rows)
+
+
+def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame,
+ out: memdf.report.OutputOption):
+ memdf.report.write_df(db.config,
+ df,
+ out,
+ 'pipe',
+ hierify=True,
+ title=False,
+ tabulate={'floatfmt': '5.1f'})
+
+
+def v1_comment_summary(df: pd.DataFrame) -> str:
+ count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
+ platforms = ', '.join(sorted(list(set(df['platform']))))
+ return f'{count} build{"" if count == 1 else "s"} for {platforms}'
+
+
 def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
  """Report on all new comparable commits."""
  if not (db.config['report.pr'] or db.config['report.push']):
@@ -542,30 +650,24 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
 
  report_push = db.config['report.push']
  report_pr = db.config['report.pr']
+ only_pr = db.config['github.limit-pr']
 
  dfs = {}
  for pr, commit, parent in db.select_matching_commits().fetchall():
  if not (report_pr if pr else report_push):
  continue
 
+ # Github doesn't have a way to fetch artifacts associated with a
+ # particular PR. For testing purposes, filter to a single PR here.
+ if only_pr and pr != only_pr:
+ continue
+
  df = changes_for_commit(db, pr, commit, parent)
  dfs[df.attrs['name']] = df
 
- if threshold := db.config['report.increases']:
- tdf = df[df['% change'] > threshold]
- else:
- tdf = None
- if tdf is not None and not tdf.empty:
- commit = df.attrs['commit']
- parent = df.attrs['parent']
- tdf.attrs['name'] = f'L,{commit},{parent}'
- tdf.attrs['title'] = (
- f'Increases above {threshold:.1f}% from {parent} to {commit}')
- dfs[tdf.attrs['name']] = tdf
-
  if (pr and comment_enabled
  and (comment_limit == 0 or comment_limit > comment_count)):
- if gh_send_change_report(db, df, tdf):
+ if gh_send_change_report(db, df):
  # Mark the originating builds, and remove the originating
  # artifacts, so that they don't generate duplicate report
  # comments.

diff --git a/scripts/tools/memory/memdf/util/sqlite.py b/scripts/tools/memory/memdf/util/sqlite.py
@@ -97,14 +97,18 @@ def store(self, table: str, **kwargs):
  self.connection().execute(q, v)
 
  def get_matching(self, table: str, columns: List[str], **kwargs):
- return self.connection().execute(
- f"SELECT {','.join(columns)} FROM {table}"
-  f" WHERE {'=? AND '.join(kwargs.keys())}=?",
-  list(kwargs.values()))
+ q = (f"SELECT {','.join(columns)} FROM {table}"
+  f" WHERE {'=? AND '.join(kwargs.keys())}=?")
+ v = list(kwargs.values())
+ return self.connection().execute(q, v)
 
  def get_matching_id(self, table: str, **kwargs):
- return self.get_matching(table, ['id'], **kwargs).fetchone()[0]
+ cur = self.get_matching(table, ['id'], **kwargs)
+ row = cur.fetchone()
+ if row:
+ return row[0]
+ return None
 
- def store_and_return_id(self, table: str, **kwargs) -> int:
+ def store_and_return_id(self, table: str, **kwargs) -> Optional[int]:
  self.store(table, **kwargs)
  return self.get_matching_id(table, **kwargs)