-
Notifications
You must be signed in to change notification settings - Fork 6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
download_benchmarks: Script for downloading benchmark results from Ci…
…rcleCI
- Loading branch information
Showing
5 changed files
with
630 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import subprocess | ||
|
||
|
||
def run_git_command(command): | ||
process = subprocess.run( | ||
command, | ||
encoding='utf8', | ||
capture_output=True, | ||
check=True, | ||
) | ||
return process.stdout.strip() | ||
|
||
|
||
def git_current_branch(): | ||
return run_git_command(['git', 'symbolic-ref', 'HEAD', '--short']) | ||
|
||
|
||
def git_commit_hash(ref: str = 'HEAD'): | ||
return run_git_command(['git', 'rev-parse', '--verify', ref]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
from pathlib import Path | ||
from typing import List, Mapping, Optional | ||
import functools | ||
import json | ||
import operator | ||
import shutil | ||
|
||
import requests | ||
|
||
|
||
class APIHelperError(Exception): | ||
pass | ||
|
||
class DataUnavailable(APIHelperError): | ||
pass | ||
|
||
class InvalidResponse(APIHelperError): | ||
pass | ||
|
||
class FileAlreadyExists(APIHelperError): | ||
pass | ||
|
||
|
||
def query_api(url: str, params: Mapping[str, str], debug_requests=False) -> dict: | ||
if debug_requests: | ||
print(f'REQUEST URL: {url}') | ||
if len(params) > 0: | ||
print(f'QUERY: {params}') | ||
|
||
response = requests.get(url, params=params) | ||
response.raise_for_status() | ||
|
||
if debug_requests: | ||
json_response = response.json() | ||
print('========== RESPONSE ==========') | ||
if json_response is not None: | ||
print(json.dumps(json_response, indent=4)) | ||
else: | ||
print(response.content) | ||
print('==============================') | ||
|
||
return response.json() | ||
|
||
|
||
def download_file(url: str, target_path: Path, overwrite=False): | ||
if not overwrite and target_path.exists(): | ||
raise FileAlreadyExists(f"Refusing to overwrite existing file: '{target_path}'.") | ||
|
||
with requests.get(url, stream=True) as request: | ||
with open(target_path, 'wb') as target_file: | ||
shutil.copyfileobj(request.raw, target_file) | ||
|
||
|
||
class Github: | ||
BASE_URL = 'https://api.github.com' | ||
|
||
project_slug: str | ||
debug_requests: bool | ||
|
||
def __init__(self, project_slug: str, debug_requests: bool): | ||
self.project_slug = project_slug | ||
self.debug_requests = debug_requests | ||
|
||
def pull_request(self, pr_id: int) -> dict: | ||
return query_api( | ||
f'{self.BASE_URL}/repos/{self.project_slug}/pulls/{pr_id}', | ||
{}, | ||
self.debug_requests | ||
) | ||
|
||
|
||
class CircleCI: | ||
# None might be a more logical default for max_pages but in most cases we'll actually | ||
# want some limit to prevent flooding the API with requests in case of a bug. | ||
DEFAULT_MAX_PAGES = 10 | ||
BASE_URL = 'https://circleci.com/api/v2' | ||
|
||
project_slug: str | ||
debug_requests: bool | ||
|
||
def __init__(self, project_slug: str, debug_requests: bool): | ||
self.project_slug = project_slug | ||
self.debug_requests = debug_requests | ||
|
||
def paginated_query_api_iterator(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES): | ||
assert 'page-token' not in params | ||
|
||
page_count = 0 | ||
next_page_token = None | ||
while max_pages is None or page_count < max_pages: | ||
if next_page_token is not None: | ||
params = {**params, 'page-token': next_page_token} | ||
|
||
json_response = query_api(url, params, self.debug_requests) | ||
|
||
yield json_response['items'] | ||
next_page_token = json_response['next_page_token'] | ||
page_count += 1 | ||
if next_page_token is None: | ||
break | ||
|
||
def paginated_query_api(self, url: str, params: Mapping[str, str], max_pages: int = DEFAULT_MAX_PAGES): | ||
return functools.reduce(operator.add, self.paginated_query_api_iterator(url, params, max_pages), []) | ||
|
||
def pipelines( | ||
self, | ||
branch: Optional[str] = None, | ||
commit_hash: Optional[str] = None, | ||
excluded_trigger_types: List[str] = None, | ||
) -> List[dict]: | ||
if excluded_trigger_types is None: | ||
excluded_trigger_types = [] | ||
|
||
for items in self.paginated_query_api_iterator( | ||
f'{self.BASE_URL}/project/gh/{self.project_slug}/pipeline', | ||
{'branch': branch} if branch is not None else {}, | ||
max_pages=10, | ||
): | ||
matching_items = [ | ||
item | ||
for item in items | ||
if ( | ||
(commit_hash is None or item['vcs']['revision'] == commit_hash) and | ||
item['trigger']['type'] not in excluded_trigger_types | ||
) | ||
] | ||
if len(matching_items) > 0: | ||
return matching_items | ||
|
||
return [] | ||
|
||
def workflows(self, pipeline_id: str) -> dict: | ||
return self.paginated_query_api(f'{self.BASE_URL}/pipeline/{pipeline_id}/workflow', {}) | ||
|
||
def jobs(self, workflow_id: str) -> Mapping[str, dict]: | ||
items = self.paginated_query_api(f'{self.BASE_URL}/workflow/{workflow_id}/job', {}) | ||
jobs_by_name = {job['name']: job for job in items} | ||
|
||
assert len(jobs_by_name) <= len(items) | ||
if len(jobs_by_name) < len(items): | ||
raise InvalidResponse("Job names in the workflow are not unique.") | ||
|
||
return jobs_by_name | ||
|
||
def job(self, workflow_id: str, name: str, require_success: bool = False) -> dict: | ||
jobs = self.jobs(workflow_id) | ||
if name not in jobs: | ||
raise DataUnavailable(f"Job {name} is not present in the workflow.") | ||
|
||
if require_success and jobs[name]['status'] != 'success': | ||
raise DataUnavailable( | ||
f"Job {name} has failed or is still running. " | ||
f"Current status: {jobs[name]['status']}." | ||
) | ||
|
||
return jobs[name] | ||
|
||
def artifacts(self, job_number: int) -> Mapping[str, dict]: | ||
items = self.paginated_query_api(f'{self.BASE_URL}/project/gh/{self.project_slug}/{job_number}/artifacts', {}) | ||
artifacts_by_name = {artifact['path']: artifact for artifact in items} | ||
|
||
assert len(artifacts_by_name) <= len(items) | ||
if len(artifacts_by_name) < len(items): | ||
raise InvalidResponse("Names of artifacts attached to the job are not unique.") | ||
|
||
return artifacts_by_name | ||
|
||
@staticmethod | ||
def latest_item(items: dict) -> dict: | ||
sorted_items = sorted(items, key=lambda item: item['created_at'], reverse=True) | ||
return sorted_items[0] if len(sorted_items) > 0 else None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from argparse import ArgumentParser, Namespace | ||
from pathlib import Path | ||
from typing import Mapping, Optional | ||
import sys | ||
|
||
import requests | ||
|
||
# Our scripts/ is not a proper Python package so we need to modify PYTHONPATH to import from it | ||
# pragma pylint: disable=import-error,wrong-import-position | ||
SCRIPTS_DIR = Path(__file__).parent.parent | ||
sys.path.insert(0, str(SCRIPTS_DIR)) | ||
|
||
from common.git_helpers import git_current_branch, git_commit_hash | ||
from common.rest_api_helpers import APIHelperError, CircleCI, Github, download_file | ||
# pragma pylint: enable=import-error,wrong-import-position | ||
|
||
|
||
def process_commandline() -> Namespace: | ||
script_description = ( | ||
"Downloads benchmark results attached as artifacts to the c_ext_benchmarks job on CircleCI. " | ||
"If no options are specified, downloads results for the currently checked out git branch." | ||
) | ||
|
||
parser = ArgumentParser(description=script_description) | ||
|
||
target_definition = parser.add_mutually_exclusive_group() | ||
target_definition.add_argument( | ||
'--branch', | ||
dest='branch', | ||
help="Git branch that the job ran on.", | ||
) | ||
target_definition.add_argument( | ||
'--pr', | ||
dest='pull_request_id', | ||
type=int, | ||
help="Github PR ID that the job ran on.", | ||
) | ||
target_definition.add_argument( | ||
'--base-of-pr', | ||
dest='base_of_pr', | ||
type=int, | ||
help="ID of a Github PR that's based on top of the branch we're interested in." | ||
) | ||
|
||
parser.add_argument( | ||
'--any-commit', | ||
dest='ignore_commit_hash', | ||
default=False, | ||
action='store_true', | ||
help="Include pipelines that ran on a different commit as long as branch/PR matches." | ||
) | ||
parser.add_argument( | ||
'--overwrite', | ||
dest='overwrite', | ||
default=False, | ||
action='store_true', | ||
help="If artifacts already exist on disk, overwrite them.", | ||
) | ||
parser.add_argument( | ||
'--debug-requests', | ||
dest='debug_requests', | ||
default=False, | ||
action='store_true', | ||
help="Print detailed info about performed API requests and received responses.", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def download_benchmark_artifact( | ||
artifacts: Mapping[str, dict], | ||
benchmark_name: str, | ||
branch: str, | ||
commit_hash: str, | ||
overwrite: bool, | ||
silent: bool = False | ||
): | ||
if not silent: | ||
print(f"Downloading artifact: {benchmark_name}-{branch}-{commit_hash[:8]}.json.") | ||
|
||
artifact_path = f'reports/externalTests/{benchmark_name}.json' | ||
|
||
if artifact_path not in artifacts: | ||
raise RuntimeError(f"Missing artifact: {artifact_path}.") | ||
|
||
download_file( | ||
artifacts[artifact_path]['url'], | ||
Path(f'{benchmark_name}-{branch}-{commit_hash[:8]}.json'), | ||
overwrite, | ||
) | ||
|
||
|
||
def download_benchmarks( | ||
branch: Optional[str], | ||
pull_request_id: Optional[int], | ||
base_of_pr: Optional[int], | ||
ignore_commit_hash: bool = False, | ||
overwrite: bool = False, | ||
debug_requests: bool = False, | ||
silent: bool = False, | ||
): | ||
github = Github('ethereum/solidity', debug_requests) | ||
circleci = CircleCI('ethereum/solidity', debug_requests) | ||
|
||
expected_commit_hash = None | ||
if branch is None and pull_request_id is None and base_of_pr is None: | ||
branch = git_current_branch() | ||
expected_commit_hash = git_commit_hash() | ||
elif branch is not None: | ||
expected_commit_hash = git_commit_hash(branch) | ||
elif pull_request_id is not None: | ||
pr_info = github.pull_request(pull_request_id) | ||
branch = pr_info['head']['ref'] | ||
expected_commit_hash = pr_info['head']['sha'] | ||
elif base_of_pr is not None: | ||
pr_info = github.pull_request(base_of_pr) | ||
branch = pr_info['base']['ref'] | ||
expected_commit_hash = pr_info['base']['sha'] | ||
|
||
if not silent: | ||
print( | ||
f"Looking for pipelines that ran on branch {branch}" + | ||
(f", commit {expected_commit_hash}." if not ignore_commit_hash else " (any commit).") | ||
) | ||
|
||
pipeline = circleci.latest_item(circleci.pipelines( | ||
branch, | ||
expected_commit_hash if not ignore_commit_hash else None, | ||
# Skip nightly workflows. They don't have the c_ext_benchmarks job and even if they did, | ||
# they would likely be running a different set of external tests. | ||
excluded_trigger_types=['schedule'], | ||
)) | ||
if pipeline is None: | ||
raise RuntimeError("No matching pipelines found.") | ||
|
||
actual_commit_hash = pipeline['vcs']['revision'] | ||
workflow_id = circleci.latest_item(circleci.workflows(pipeline['id']))['id'] | ||
benchmark_collector_job = circleci.job(workflow_id, 'c_ext_benchmarks', require_success=True) | ||
|
||
artifacts = circleci.artifacts(int(benchmark_collector_job['job_number'])) | ||
|
||
download_benchmark_artifact(artifacts, 'summarized-benchmarks', branch, actual_commit_hash, overwrite, silent) | ||
download_benchmark_artifact(artifacts, 'all-benchmarks', branch, actual_commit_hash, overwrite, silent) | ||
|
||
|
||
def main(): | ||
try: | ||
options = process_commandline() | ||
download_benchmarks( | ||
options.branch, | ||
options.pull_request_id, | ||
options.base_of_pr, | ||
options.ignore_commit_hash, | ||
options.overwrite, | ||
options.debug_requests, | ||
) | ||
|
||
return 0 | ||
except APIHelperError as exception: | ||
print(f"[ERROR] {exception}", file=sys.stderr) | ||
return 1 | ||
except requests.exceptions.HTTPError as exception: | ||
print(f"[ERROR] {exception}", file=sys.stderr) | ||
return 1 | ||
except RuntimeError as exception: | ||
print(f"[ERROR] {exception}", file=sys.stderr) | ||
return 1 | ||
|
||
if __name__ == '__main__': | ||
sys.exit(main()) |
Oops, something went wrong.