Skip to content

Commit

Permalink
K8s fetch secrets (#93)
Browse files Browse the repository at this point in the history
  • Loading branch information
Inal Djafar authored Jan 16, 2020
1 parent 6bbbc54 commit 42fe126
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 11 deletions.
2 changes: 1 addition & 1 deletion backend/backend/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def to_bool(value):
'CAPTURE_LOGS': to_bool(os.environ.get('TASK_CAPTURE_LOGS', True)),
'CLEAN_EXECUTION_ENVIRONMENT': to_bool(os.environ.get('TASK_CLEAN_EXECUTION_ENVIRONMENT', True)),
'CACHE_DOCKER_IMAGES': to_bool(os.environ.get('TASK_CACHE_DOCKER_IMAGES', False)),
'CHAINKEYS_ENABLED': to_bool(os.environ.get('TASK_CHAINKEYS_ENABLED', False)),
}

CELERY_RESULT_BACKEND = 'django-db'
Expand All @@ -201,4 +202,3 @@ def to_bool(value):
DATA_UPLOAD_MAX_NUMBER_FIELDS = 10000

EXPIRY_TOKEN_LIFETIME = timedelta(minutes=int(os.environ.get('EXPIRY_TOKEN_LIFETIME', 24*60)))

1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ GPUtil == 1.4.0
hkdf >= 0.0.3
ipython==6.4.0
ipython-genutils==0.2.0
kubernetes==10.0.1
mock==2.0.0
psycopg2-binary==2.7.4
protobuf == 3.6.0
Expand Down
56 changes: 50 additions & 6 deletions backend/substrapp/tasks/tasks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import absolute_import, unicode_literals

from base64 import b64decode
import os
import shutil
import tempfile
Expand All @@ -9,6 +10,7 @@
import logging

import docker
from kubernetes.client.rest import ApiException
from checksumdir import dirhash
from django.core.exceptions import ObjectDoesNotExist
from django.conf import settings
Expand All @@ -21,10 +23,9 @@
from substrapp.utils import get_hash, get_owner, create_directory, uncompress_content
from substrapp.ledger_utils import (log_start_tuple, log_success_tuple, log_fail_tuple,
query_tuples, LedgerError, LedgerStatusError, get_object_from_ledger)
from substrapp.tasks.utils import ResourcesManager, compute_docker, get_asset_content, list_files
from substrapp.tasks.utils import ResourcesManager, compute_docker, get_asset_content, list_files, get_k8s_client
from substrapp.tasks.exception_handler import compute_error_code


PREFIX_HEAD_FILENAME = 'head_'
PREFIX_TRUNK_FILENAME = 'trunk_'

Expand Down Expand Up @@ -598,6 +599,7 @@ def _do_task(client, subtuple_directory, tuple_type, subtuple, compute_plan_id,
output_trunk_model_filename = 'trunk_model'

remove_image = not((compute_plan_id is not None and rank != -1) or settings.TASK['CACHE_DOCKER_IMAGES'])
environment = {}

# VOLUMES

Expand Down Expand Up @@ -625,6 +627,41 @@ def _do_task(client, subtuple_directory, tuple_type, subtuple, compute_plan_id,
client.volumes.create(name=volume_id)
model_volume[volume_id] = {'bind': '/sandbox/local', 'mode': 'rw'}

if compute_plan_id is not None and settings.TASK['CHAINKEYS_ENABLED']:
chainkeys_directory = get_chainkeys_directory(compute_plan_id)

if not os.path.exists(chainkeys_directory):
secret_namespace = os.getenv('K8S_SECRET_NAMESPACE', 'default')
label_selector = f"compute_plan={subtuple.get('tag')}"

k8s_client = get_k8s_client()
try:
secrets = k8s_client.list_namespaced_secret(secret_namespace, label_selector=label_selector)
except ApiException as e:
logging.error(f'failed to fetch namespaced secrets {secret_namespace} with selector {label_selector}')
raise e

secrets = {s['metadata']['name']: int.from_bytes(b64decode(s['data']['key']), 'big')
for s in secrets.to_dict()['items']}

os.makedirs(chainkeys_directory)
with open(path.join(chainkeys_directory, 'chainkeys.json'), 'w') as f:
json.dump(f, secrets)

for secret_name in secrets.keys():
try:
k8s_client.delete_namespaced_secret(secret_name, secret_namespace)
except ApiException as e:
logging.error(f'failed to delete secrets from namespace {secret_namespace}')
raise e

volumes[chainkeys_directory] = {'bind': '/sandbox/chainkeys', 'mode': 'rw'}

# Environment current node index
node_index = os.getenv('NODE_INDEX')
if node_index:
environment["NODE_INDEX"] = node_index

# generate command
if tuple_type == TRAINTUPLE_TYPE:
command = 'train'
Expand Down Expand Up @@ -693,7 +730,8 @@ def _do_task(client, subtuple_directory, tuple_type, subtuple, compute_plan_id,
command=command,
remove_image=remove_image,
remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'],
capture_logs=settings.TASK['CAPTURE_LOGS']
capture_logs=settings.TASK['CAPTURE_LOGS'],
environment=environment
)

# save model in database
Expand Down Expand Up @@ -742,7 +780,8 @@ def _do_task(client, subtuple_directory, tuple_type, subtuple, compute_plan_id,
command=None,
remove_image=remove_image,
remove_container=settings.TASK['CLEAN_EXECUTION_ENVIRONMENT'],
capture_logs=settings.TASK['CAPTURE_LOGS']
capture_logs=settings.TASK['CAPTURE_LOGS'],
environment=environment
)

# load performance
Expand All @@ -766,10 +805,15 @@ def save_model(subtuple_directory, subtuple_key, filename='model'):
return end_model_file, end_model_file_hash


def get_volume_id(compute_plan_id):
def get_volume_id(compute_plan_id, prefix='local'):
org_name = getattr(settings, 'ORG_NAME')
return f'local-{compute_plan_id}-{org_name}'
return f'{prefix}-{compute_plan_id}-{org_name}'


def get_subtuple_directory(subtuple):
return path.join(getattr(settings, 'MEDIA_ROOT'), 'subtuple', subtuple['key'])


def get_chainkeys_directory(compute_plan_id):
return path.join(getattr(settings, 'MEDIA_ROOT'), getattr(settings, 'ORG_NAME'), 'computeplan',
compute_plan_id, 'chainkeys')
13 changes: 10 additions & 3 deletions backend/substrapp/tasks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from requests.auth import HTTPBasicAuth
from substrapp.utils import get_owner, get_remote_file_content, NodeError

from kubernetes import client, config

DOCKER_LABEL = 'substra_task'

Expand Down Expand Up @@ -197,7 +198,7 @@ def list_files(startpath):


def compute_docker(client, resources_manager, dockerfile_path, image_name, container_name, volumes, command,
remove_image=True, remove_container=True, capture_logs=True):
environment, remove_image=True, remove_container=True, capture_logs=True):

dockerfile_fullpath = os.path.join(dockerfile_path, 'Dockerfile')
if not os.path.exists(dockerfile_fullpath):
Expand Down Expand Up @@ -236,11 +237,12 @@ def compute_docker(client, resources_manager, dockerfile_path, image_name, conta
'network_disabled': True,
'network_mode': 'none',
'privileged': False,
'cap_drop': ['ALL']
'cap_drop': ['ALL'],
'environment': environment
}

if gpu_set is not None:
task_args['environment'] = {'NVIDIA_VISIBLE_DEVICES': gpu_set}
task_args['environment'].update({'NVIDIA_VISIBLE_DEVICES': gpu_set})
task_args['runtime'] = 'nvidia'

try:
Expand Down Expand Up @@ -339,3 +341,8 @@ def get_cpu_gpu_sets(cls):
gpu_set = gpu_sets_available.pop()

return cpu_set, gpu_set


def get_k8s_client():
config.load_incluster_config()
return client.CoreV1Api()
2 changes: 1 addition & 1 deletion backend/substrapp/tests/tests_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def test_compute_docker(self):
hash_docker = uuid.uuid4().hex
compute_docker(client, self.ResourcesManager,
self.subtuple_path, 'test_compute_docker_' + hash_docker,
'test_compute_docker_name_' + hash_docker, None, None)
'test_compute_docker_name_' + hash_docker, None, None, environment={})

self.assertIsNone(cpu_set)
self.assertIsNone(gpu_set)
Expand Down
3 changes: 3 additions & 0 deletions charts/substra-backend/templates/deployment-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ spec:
value: {{ .Values.persistence.hostPath }}/medias/
- name: PYTHONUNBUFFERED
value: "1"
{{- with .Values.extraEnv }}
{{ toYaml . | indent 10 }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.backend.service.port }}
Expand Down
3 changes: 3 additions & 0 deletions charts/substra-backend/templates/deployment-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ spec:
value: {{ .Values.persistence.hostPath }}/medias/
- name: PYTHONUNBUFFERED
value: "1"
{{- with .Values.extraEnv }}
{{ toYaml . | indent 10 }}
{{- end }}
volumeMounts:
- name: dockersocket
mountPath: /var/run/docker.sock
Expand Down
4 changes: 4 additions & 0 deletions charts/substra-backend/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,7 @@ celeryworker:
tolerations: []

affinity: {}

extraEnv: []
# - name: ENV_VARIABLE
# value: false
1 change: 1 addition & 0 deletions docker/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def generate_docker_compose_file(conf, launch_settings):
f"TASK_CAPTURE_LOGS=True",
f"TASK_CLEAN_EXECUTION_ENVIRONMENT=True",
f"TASK_CACHE_DOCKER_IMAGES=False",
f"TASK_CHAINKEYS_ENABLED=False",

f'CELERY_BROKER_URL={CELERY_BROKER_URL}',
]
Expand Down

0 comments on commit 42fe126

Please sign in to comment.