Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benchmark downloader (script + CI job) #12818

Merged
merged 2 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 59 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -690,9 +690,16 @@ jobs:
name: Install pip
command: apt -q update && apt install -y python3-pip
- run:
name: Install pylint
command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
# also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
name: Install pylint and dependencies of the scripts that will be linted
command: python3 -m pip install
pylint
z3-solver
pygments-lexer-solidity
parsec
tabulate
deepdiff
colorama
requests
- run:
name: Linting Python Scripts
command: ./scripts/pylint_all.py
Expand Down Expand Up @@ -1227,16 +1234,65 @@ jobs:
- checkout
- attach_workspace:
at: .
- run:
name: Install dependencies of helper scripts
command: |
sudo apt update
sudo apt install python3-pip --assume-yes --no-install-recommends
python3 -m pip install requests --user
- run:
name: Combine benchmark reports
command: cat reports/externalTests/benchmark-*.json | scripts/externalTests/merge_benchmarks.sh > reports/externalTests/all-benchmarks.json
- run:
name: Summarize reports
command: cat reports/externalTests/all-benchmarks.json | scripts/externalTests/summarize_benchmarks.sh > reports/externalTests/summarized-benchmarks.json
- run:
name: Download reports from base branch
command: |
if [[ $CIRCLE_PULL_REQUEST != "" ]]; then
mkdir reports/externalTests/base-branch/
cd reports/externalTests/base-branch/

pr_id=$(echo "$CIRCLE_PULL_REQUEST" | sed 's|\(.*\)\/||')
scripts_dir=../../../scripts

"${scripts_dir}/externalTests/download_benchmarks.py" --base-of-pr "$pr_id"
fi
- run:
name: Diff benchmarks
command: |
if [[ $CIRCLE_PULL_REQUEST != "" ]]; then
cd reports/externalTests/
mkdir diff/
scripts_dir=../../scripts

"${scripts_dir}/externalTests/benchmark_diff.py" table \
--output-format markdown \
--style humanized \
base-branch/summarized-benchmarks-*.json \
summarized-benchmarks.json > diff/benchmark-diff-summarized-table-markdown-humanized.md
"${scripts_dir}/externalTests/benchmark_diff.py" table \
--output-format markdown \
--style absolute \
base-branch/summarized-benchmarks-*.json \
summarized-benchmarks.json > diff/benchmark-diff-summarized-table-markdown-absolute.md
"${scripts_dir}/externalTests/benchmark_diff.py" inplace \
--style absolute \
base-branch/summarized-benchmarks-*.json \
summarized-benchmarks.json > diff/benchmark-diff-summarized-inplace-absolute.md
"${scripts_dir}/externalTests/benchmark_diff.py" inplace \
--style absolute \
base-branch/all-benchmarks-*.json \
all-benchmarks.json > diff/benchmark-diff-all-table-inplace-absolute.md
Comment on lines +1269 to +1286
Copy link
Member Author

@cameel cameel Mar 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might not need all of these. benchmark-diff-summarized-table-markdown-humanized.md will probably be the most useful one in practice. It will soon be possible to include extra columns with absolute values in it as well.

On the other hand the overhead of having all of them is negligible and also this acts as an end to end test for the script so I included them here anyway.

fi
- store_artifacts:
path: reports/externalTests/all-benchmarks.json
- store_artifacts:
path: reports/externalTests/summarized-benchmarks.json
- store_artifacts:
path: reports/externalTests/diff/
- store_artifacts:
path: reports/externalTests/base-branch/

b_win: &b_win
<<: *base_win_powershell_large
Expand Down
19 changes: 19 additions & 0 deletions scripts/common/git_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import subprocess


def run_git_command(command):
process = subprocess.run(
command,
encoding='utf8',
capture_output=True,
check=True,
)
return process.stdout.strip()


def git_current_branch():
return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])


def git_commit_hash(ref: str = 'HEAD'):
return run_git_command(['git', 'rev-parse', '--verify', ref])
171 changes: 171 additions & 0 deletions scripts/common/rest_api_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from pathlib import Path
from typing import List, Mapping, Optional
import functools
import json
import operator
import shutil

import requests


class APIHelperError(Exception):
pass

class DataUnavailable(APIHelperError):
pass

class InvalidResponse(APIHelperError):
pass

class FileAlreadyExists(APIHelperError):
pass


def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
if debug_requests:
print(f'REQUEST URL: {url}')
if len(params) > 0:
print(f'QUERY: {params}')

response = requests.get(url, params=params)
response.raise_for_status()

if debug_requests:
json_response = response.json()
print('========== RESPONSE ==========')
if json_response is not None:
print(json.dumps(json_response, indent=4))
else:
print(response.content)
print('==============================')

return response.json()


def download_file(url: str, target_path: Path, overwrite=False):
if not overwrite and target_path.exists():
raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")

with requests.get(url, stream=True) as request:
with open(target_path, 'wb') as target_file:
shutil.copyfileobj(request.raw, target_file)


class Github:
BASE_URL = 'https://api.github.com'

project_slug: str
debug_requests: bool

def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests

def pull_request(self, pr_id: int) -> dict:
return query_api(
f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
{},
self.debug_requests
)


class CircleCI:
# None might be a more logical default for max_pages but in most cases we'll actually
# want some limit to prevent flooding the API with requests in case of a bug.
DEFAULT_MAX_PAGES = 10
BASE_URL = 'https://circleci.com/api/v2'

project_slug: str
debug_requests: bool

def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests

def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
assert 'page-token' not in params

page_count = 0
next_page_token = None
while max_pages is None or page_count < max_pages:
if next_page_token is not None:
params = {**params, 'page-token': next_page_token}

json_response = query_api(url, params, self.debug_requests)

yield json_response['items']
next_page_token = json_response['next_page_token']
page_count += 1
if next_page_token is None:
break

def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])

def pipelines(
self,
branch: Optional[str] = None,
commit_hash: Optional[str] = None,
excluded_trigger_types: List[str] = None,
) -> List[dict]:
if excluded_trigger_types is None:
excluded_trigger_types = []

for items in self.paginated_query_api_iterator(
f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
{'branch': branch} if branch is not None else {},
max_pages=10,
):
matching_items = [
item
for item in items
if (
(commit_hash is None or item['vcs']['revision'] == commit_hash) and
item['trigger']['type'] not in excluded_trigger_types
)
]
if len(matching_items) > 0:
return matching_items

return []

def workflows(self, pipeline_id: str) -> dict:
return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})

def jobs(self, workflow_id: str) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
jobs_by_name = {job['name']: job for job in items}

assert len(jobs_by_name) <= len(items)
if len(jobs_by_name) < len(items):
raise InvalidResponse("Job names in the workflow are not unique.")

return jobs_by_name

def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
jobs = self.jobs(workflow_id)
if name not in jobs:
raise DataUnavailable(f"Job {name} is not present in the workflow.")

if require_success and jobs[name]['status'] != 'success':
raise DataUnavailable(
f"Job {name} has failed or is still running. "
f"Current status: {jobs[name]['status']}."
)

return jobs[name]

def artifacts(self, job_number: int) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
artifacts_by_name = {artifact['path']: artifact for artifact in items}

assert len(artifacts_by_name) <= len(items)
if len(artifacts_by_name) < len(items):
raise InvalidResponse("Names of artifacts attached to the job are not unique.")

return artifacts_by_name

@staticmethod
def latest_item(items: dict) -> dict:
sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
return sorted_items[0] if len(sorted_items) > 0 else None
Loading