Skip to content
This repository has been archived by the owner on Oct 31, 2023. It is now read-only.

+ Docker image integrity verifier #4392

Merged
merged 8 commits into from
Jun 27, 2019
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions scripts/docker_integrity/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
This script verifies the integrity of Golem's Docker hub images.

In order to do that, a registry of docker images required by Golem is defined in
the `image_integrity.ini` file that has a format of:

```
golemfactory/image_name 1.0 sha256-hash-of-the-image
```

The registry holds entries valid for the current branch and must include only
production images.

To run verification, just launch the script:

`./scripts/docker_integrity/verify.py`

To ensure that all docker images used by Golem are included in the verification
check, add a `--verify-coverage` flag:

`./scripts/docker_integrity/verify.py --verify-coverage`

This detects situations when Golem's images have been updated without including
them in the verification and, at the same time, should prevent accidental updates
that cause non-production images to make it into the major branch.

The script will run through all images listed in the registry and will produce
a consistent report.

If all images are found intact, it will exit normally, with an exit code of `0`.

Should it encounter hash mismatches, it will produce a failure report and an
exit code of `1`. It will also exit erroneously if any errors are encountered
that would prevent correct verification of images.
12 changes: 12 additions & 0 deletions scripts/docker_integrity/image_integrity.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#
# NEVER PUT NON-PRODUCTION IMAGES/TAGS INTO THIS FILE
#
# repository tag hash
golemfactory/base 1.5 93c72af33f5eefaf325f594f0f46237cb07c25bbc3a1283ae91eb70761dcd035
golemfactory/blender 1.10 9d857c19e136e084edae95ba6982bb168f411e414ade50215d639f0c907df398
golemfactory/blender_nvgpu 1.4 ff84d6f5a84557eb6f2535b5bdb2caa3d4e720c96f960f934274869d7cc3aa63
golemfactory/blender_verifier 1.5 705f94c0e6944d792ac4c47330c443a0905e03c98a183ab6e8775b3717508628
golemfactory/dummy 1.2 60ba63d94c08ceebe67d8af6325fe37928d5178f0f7d340a195df5cf8d042d4b
golemfactory/glambda 1.4 2417d0fcde4a90d69b78a5552920beac8cca8d68283eace7c68ea231ea623b7b
golemfactory/nvgpu 1.4 7344c68586f06e61a1adae738d95d7dcd37306c6936c21ea06437326ba32b5f0
golemfactory/wasm 0.3.0 fea1d5c524044bd889ebea906db49a4345cce78b2c7ab2f8c4ef4e71ffbebbb4
240 changes: 240 additions & 0 deletions scripts/docker_integrity/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
#!/usr/bin/env python
import argparse
import json
import pathlib
import re
import requests
from requests.status_codes import codes as http_codes
import sys
import typing

DOCKERHUB_URI = 'https://registry.hub.docker.com/v2/'
REPOSITORY_ROOT = 'golemfactory'
IMAGES_FILE = pathlib.Path(__file__).parents[0] / 'image_integrity.ini'
GOLEM_IMAGES_FILE = pathlib.Path(__file__).parents[2] / 'apps/images.ini'


class COLORS(object):
RESET = '\033[0m'
RED = '\033[1;31m'
GREEN = '\033[1;32m'


class AuthenticationError(Exception):
pass


class ConfigurationError(Exception):
pass


class CommunicationError(Exception):
pass


class CoverageError(Exception):
pass


def get_golem_images() -> dict:
images: dict = {}

with open(GOLEM_IMAGES_FILE) as f:
for l in f:
m = re.match(
r"(?P<repo>[\w._/]+)\s+\S+\s+(?P<tag>[\w.]+)", l)
shadeofblue marked this conversation as resolved.
Show resolved Hide resolved
if not m:
continue

images[m.group('repo')] = m.group('tag')

if not images:
raise ConfigurationError(
"Could not parse Golem `images.ini`. Format has changed?"
)

return images


def get_images() -> dict:
images: dict = {}
shadeofblue marked this conversation as resolved.
Show resolved Hide resolved
with open(IMAGES_FILE) as f:
for l in f:
m = re.match(
r"(?P<repo>[\w._/]+)\s+(?P<tag>[\w.]+)\s+(?P<hash>\w+)?$", l)

if not m:
continue

m_repo = m.group('repo')
m_tag = m.group('tag')

repo = images.setdefault(m_repo, {})

if m_tag in repo and m.group('hash') != repo.get(m_tag):
raise ConfigurationError(
f"{m_repo}:{m_tag} has a conflicting hash: "
f"'{m.group('hash')}' vs '{repo.get(m_tag)}' "
f"defined in '{IMAGES_FILE}'."
)
else:
repo[m.group('tag')] = m.group('hash')

return images


def authenticate(repository: str):
r = requests.get(DOCKERHUB_URI)
if not r.status_code == http_codes.UNAUTHORIZED:
raise AuthenticationError(
f"Unexpected status code: {r.status_code} "
f"while retrieving: {DOCKERHUB_URI}"
)
auth_properties = {
g[0]: g[1]
for g in re.findall(
r"(\w+)=\"(.+?)\"", r.headers.get('Www-Authenticate', '')
)
}
realm = auth_properties.get('realm')
if not realm:
raise AuthenticationError(
f"Could not find expected auth header in: {r.headers}"
)
auth_r = requests.get( # type:ignore
realm,
params={
'service': auth_properties.get('service'),
'scope': f'repository:{repository}:pull',
}
)
if not auth_r.status_code == http_codes.OK:
raise AuthenticationError(
f"Could not access: {realm}"
)
try:
token = auth_r.json().get('token')
return {
'Authorization': f'Bearer {token}',
'Accept': 'application/vnd.docker.distribution.manifest.v2+json'
}
except json.decoder.JSONDecodeError:
raise AuthenticationError(
f"Auth token not found in {auth_r.text}, retrieved from {realm}."
)


def get_manifest(token: dict, repository: str, tag: str):
r = requests.get(
DOCKERHUB_URI + f'{repository}/manifests/{tag}',
headers=token
)
try:
manifest = r.json()
if not isinstance(manifest, dict):
raise CommunicationError(
f"Expected a dictionary, got {type(manifest)}: {manifest} "
f"for {repository}:{tag}"
)
except json.JSONDecodeError as e:
raise CommunicationError(
f"Failed to retrieve the correct manifest for {repository}:{tag}, "
f"got {r.status_code} - {r.text}"
) from e

return manifest


def get_info(repository: str, tag: str):
r = requests.get(DOCKERHUB_URI + f'repositories/{repository}/tags/{tag}/')
try:
info = r.json()
if not isinstance(info, dict):
raise CommunicationError(
f"Expected a dictionary, got {type(info)}: {info} "
f"for {repository}:{tag}"
)
except json.JSONDecodeError as e:
raise CommunicationError(
f"Failed to retrieve image info for {repository}:{tag}, "
f"got {r.status_code} - {r.text}"
) from e

return info


def verify_images() -> typing.Tuple[int, int]:
cnt_images = 0
cnt_failures = 0
for repository, tags in get_images().items():
token = authenticate(repository)
for tag, img_hash in tags.items():
cnt_images += 1
manifest = get_manifest(token, repository, tag)
manifest_hash = manifest.get('config', {}).get('digest', '')[7:]
if img_hash != manifest_hash:
last_updated = get_info(repository, tag).get('last_updated')
print(
f'{repository}:{tag}: '
f'{COLORS.RED}hash differs '
f'(expected:{img_hash}, received:{manifest_hash}).'
f'{COLORS.RESET}'
f' Last updated: {last_updated}'
)
cnt_failures += 1
else:
print(
f'{repository}:{tag}: {COLORS.GREEN}\u2713{COLORS.RESET}'
)

return cnt_images, cnt_failures


def verify_coverage():
integrity_images = get_images()
for repository, tag in get_golem_images().items():
if tag not in integrity_images.get(repository):
raise CoverageError(
f'{repository}:{tag} is not present in {IMAGES_FILE}')


def run_verification():

cnt_images, cnt_failures = verify_images()

if cnt_failures:
print(
f'{COLORS.RED}{cnt_failures} out of {cnt_images} images '
f'had modified hashes!{COLORS.RESET}'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/had/have/?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ugh... it's more complicated than that ;p

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Krigpl anyway, updated

)
sys.exit(1)

print(
f'{COLORS.GREEN}All {cnt_images} images successfully verified :)'
f'{COLORS.RESET}'
)
sys.exit(0)


def run():

parser = argparse.ArgumentParser(
description="Verify integrity of Golem Docker hub images")
parser.add_argument(
'--verify-coverage',
help=f"Ensure all Golem images defined in {GOLEM_IMAGES_FILE} "
f"are checked for integrity.",
action='store_true',
)
args = parser.parse_args()

if args.verify_coverage:
print("Verifying coverage... ")
verify_coverage()
print(f"{COLORS.GREEN}All images protected :){COLORS.RESET}")

print("Verifying Golem Docker image integrity...")
run_verification()


run()