Skip to content

Commit

Permalink
download_benchmarks: Script for downloading benchmark results from Ci…
Browse files Browse the repository at this point in the history
…rcleCI
  • Loading branch information
cameel committed Mar 28, 2022
1 parent b3e3669 commit 40894e9
Show file tree
Hide file tree
Showing 5 changed files with 630 additions and 3 deletions.
13 changes: 10 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -666,9 +666,16 @@ jobs:
name: Install pip
command: apt -q update && apt install -y python3-pip
- run:
name: Install pylint
command: python3 -m pip install pylint z3-solver pygments-lexer-solidity parsec tabulate deepdiff colorama
# also z3-solver, parsec and tabulate to make sure pylint knows about this module, pygments-lexer-solidity for docs
name: Install pylint and dependencies of the scripts that will be linted
command: python3 -m pip install
pylint
z3-solver
pygments-lexer-solidity
parsec
tabulate
deepdiff
colorama
requests
- run:
name: Linting Python Scripts
command: ./scripts/pylint_all.py
Expand Down
19 changes: 19 additions & 0 deletions scripts/common/git_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import subprocess


def run_git_command(command):
process = subprocess.run(
command,
encoding='utf8',
capture_output=True,
check=True,
)
return process.stdout.strip()


def git_current_branch():
return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short'])


def git_commit_hash(ref: str = 'HEAD'):
return run_git_command(['git', 'rev-parse', '--verify', ref])
171 changes: 171 additions & 0 deletions scripts/common/rest_api_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from pathlib import Path
from typing import List, Mapping, Optional
import functools
import json
import operator
import shutil

import requests


class APIHelperError(Exception):
pass

class DataUnavailable(APIHelperError):
pass

class InvalidResponse(APIHelperError):
pass

class FileAlreadyExists(APIHelperError):
pass


def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict:
if debug_requests:
print(f'REQUEST URL: {url}')
if len(params) > 0:
print(f'QUERY: {params}')

response = requests.get(url, params=params)
response.raise_for_status()

if debug_requests:
json_response = response.json()
print('========== RESPONSE ==========')
if json_response is not None:
print(json.dumps(json_response, indent=4))
else:
print(response.content)
print('==============================')

return response.json()


def download_file(url: str, target_path: Path, overwrite=False):
if not overwrite and target_path.exists():
raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.")

with requests.get(url, stream=True) as request:
with open(target_path, 'wb') as target_file:
shutil.copyfileobj(request.raw, target_file)


class Github:
BASE_URL = 'https://api.github.com'

project_slug: str
debug_requests: bool

def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests

def pull_request(self, pr_id: int) -> dict:
return query_api(
f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}',
{},
self.debug_requests
)


class CircleCI:
# None might be a more logical default for max_pages but in most cases we'll actually
# want some limit to prevent flooding the API with requests in case of a bug.
DEFAULT_MAX_PAGES = 10
BASE_URL = 'https://circleci.com/api/v2'

project_slug: str
debug_requests: bool

def __init__(self, project_slug: str, debug_requests: bool):
self.project_slug = project_slug
self.debug_requests = debug_requests

def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
assert 'page-token' not in params

page_count = 0
next_page_token = None
while max_pages is None or page_count < max_pages:
if next_page_token is not None:
params = {**params, 'page-token': next_page_token}

json_response = query_api(url, params, self.debug_requests)

yield json_response['items']
next_page_token = json_response['next_page_token']
page_count += 1
if next_page_token is None:
break

def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES):
return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), [])

def pipelines(
self,
branch: Optional[str] = None,
commit_hash: Optional[str] = None,
excluded_trigger_types: List[str] = None,
) -> List[dict]:
if excluded_trigger_types is None:
excluded_trigger_types = []

for items in self.paginated_query_api_iterator(
f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline',
{'branch': branch} if branch is not None else {},
max_pages=10,
):
matching_items = [
item
for item in items
if (
(commit_hash is None or item['vcs']['revision'] == commit_hash) and
item['trigger']['type'] not in excluded_trigger_types
)
]
if len(matching_items) > 0:
return matching_items

return []

def workflows(self, pipeline_id: str) -> dict:
return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {})

def jobs(self, workflow_id: str) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {})
jobs_by_name = {job['name']: job for job in items}

assert len(jobs_by_name) <= len(items)
if len(jobs_by_name) < len(items):
raise InvalidResponse("Job names in the workflow are not unique.")

return jobs_by_name

def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict:
jobs = self.jobs(workflow_id)
if name not in jobs:
raise DataUnavailable(f"Job {name} is not present in the workflow.")

if require_success and jobs[name]['status'] != 'success':
raise DataUnavailable(
f"Job {name} has failed or is still running. "
f"Current status: {jobs[name]['status']}."
)

return jobs[name]

def artifacts(self, job_number: int) -> Mapping[str, dict]:
items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {})
artifacts_by_name = {artifact['path']: artifact for artifact in items}

assert len(artifacts_by_name) <= len(items)
if len(artifacts_by_name) < len(items):
raise InvalidResponse("Names of artifacts attached to the job are not unique.")

return artifacts_by_name

@staticmethod
def latest_item(items: dict) -> dict:
sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True)
return sorted_items[0] if len(sorted_items) > 0 else None
172 changes: 172 additions & 0 deletions scripts/externalTests/download_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
#!/usr/bin/env python3

from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Mapping, Optional
import sys

import requests

# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it
# pragma pylint: disable=import-error,wrong-import-position
SCRIPTS_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(SCRIPTS_DIR))

from common.git_helpers import git_current_branch, git_commit_hash
from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file
# pragma pylint: enable=import-error,wrong-import-position


def process_commandline() -> Namespace:
script_description = (
"Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. "
"If no options are specified, downloads results for the currently checked out git branch."
)

parser = ArgumentParser(description=script_description)

target_definition = parser.add_mutually_exclusive_group()
target_definition.add_argument(
'--branch',
dest='branch',
help="Git branch that the job ran on.",
)
target_definition.add_argument(
'--pr',
dest='pull_request_id',
type=int,
help="Github PR ID that the job ran on.",
)
target_definition.add_argument(
'--base-of-pr',
dest='base_of_pr',
type=int,
help="ID of a Github PR that's based on top of the branch we're interested in."
)

parser.add_argument(
'--any-commit',
dest='ignore_commit_hash',
default=False,
action='store_true',
help="Include pipelines that ran on a different commit as long as branch/PR matches."
)
parser.add_argument(
'--overwrite',
dest='overwrite',
default=False,
action='store_true',
help="If artifacts already exist on disk, overwrite them.",
)
parser.add_argument(
'--debug-requests',
dest='debug_requests',
default=False,
action='store_true',
help="Print detailed info about performed API requests and received responses.",
)

return parser.parse_args()


def download_benchmark_artifact(
artifacts: Mapping[str, dict],
benchmark_name: str,
branch: str,
commit_hash: str,
overwrite: bool,
silent: bool = False
):
if not silent:
print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.")

artifact_path = f'reports/externalTests/{benchmark_name}.json'

if artifact_path not in artifacts:
raise RuntimeError(f"Missing artifact: {artifact_path}.")

download_file(
artifacts[artifact_path]['url'],
Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'),
overwrite,
)


def download_benchmarks(
branch: Optional[str],
pull_request_id: Optional[int],
base_of_pr: Optional[int],
ignore_commit_hash: bool = False,
overwrite: bool = False,
debug_requests: bool = False,
silent: bool = False,
):
github = Github('ethereum/solidity', debug_requests)
circleci = CircleCI('ethereum/solidity', debug_requests)

expected_commit_hash = None
if branch is None and pull_request_id is None and base_of_pr is None:
branch = git_current_branch()
expected_commit_hash = git_commit_hash()
elif branch is not None:
expected_commit_hash = git_commit_hash(branch)
elif pull_request_id is not None:
pr_info = github.pull_request(pull_request_id)
branch = pr_info['head']['ref']
expected_commit_hash = pr_info['head']['sha']
elif base_of_pr is not None:
pr_info = github.pull_request(base_of_pr)
branch = pr_info['base']['ref']
expected_commit_hash = pr_info['base']['sha']

if not silent:
print(
f"Looking for pipelines that ran on branch {branch}" +
(f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).")
)

pipeline = circleci.latest_item(circleci.pipelines(
branch,
expected_commit_hash if not ignore_commit_hash else None,
# Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did,
# they would likely be running a different set of external tests.
excluded_trigger_types=['schedule'],
))
if pipeline is None:
raise RuntimeError("No matching pipelines found.")

actual_commit_hash = pipeline['vcs']['revision']
workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id']
benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True)

artifacts = circleci.artifacts(int(benchmark_collector_job['job_number']))

download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent)
download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent)


def main():
try:
options = process_commandline()
download_benchmarks(
options.branch,
options.pull_request_id,
options.base_of_pr,
options.ignore_commit_hash,
options.overwrite,
options.debug_requests,
)

return 0
except APIHelperError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1
except requests.exceptions.HTTPError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1
except RuntimeError as exception:
print(f"[ERROR] {exception}", file=sys.stderr)
return 1

if __name__ == '__main__':
sys.exit(main())
Loading

0 comments on commit 40894e9

Please sign in to comment.