Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#438] Fix accessing unknown deployments #440

Merged
merged 13 commits into from
Sep 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ node {
sh '''
cd legion
../.venv/bin/pycodestyle --show-source --show-pep8 legion
../.venv/bin/pycodestyle --show-source --show-pep8 tests --ignore E402,E126,W503
../.venv/bin/pycodestyle --show-source --show-pep8 tests --ignore E402,E126,W503,E731
../.venv/bin/pydocstyle --source legion

export TERM="linux"
Expand Down
2 changes: 1 addition & 1 deletion legion/analyze_code.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

pycodestyle --show-source --show-pep8 legion
pycodestyle --show-source --show-pep8 tests --ignore E402,E126,W503
pycodestyle --show-source --show-pep8 tests --ignore E402,E126,W503,E731
pydocstyle --source legion
pylint legion
3 changes: 3 additions & 0 deletions legion/legion/k8s/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
EVENT_MODIFIED = 'MODIFIED'
EVENT_DELETED = 'DELETED'

LOAD_DATA_ITERATIONS = 5
LOAD_DATA_TIMEOUT = 2

ModelContainerMetaInformation = typing.NamedTuple('ModelContainerMetaInformation', [
('k8s_name', str),
('model_id', str),
Expand Down
48 changes: 31 additions & 17 deletions legion/legion/k8s/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@
from legion.k8s.definitions import ModelIdVersion
from legion.k8s.definitions import LEGION_COMPONENT_LABEL, LEGION_SYSTEM_LABEL, LEGION_API_SERVICE_PORT
from legion.k8s.definitions import STATUS_OK, STATUS_WARN, STATUS_FAIL
from legion.k8s.definitions import LOAD_DATA_ITERATIONS, LOAD_DATA_TIMEOUT
import legion.k8s.utils
from legion.utils import normalize_name
from legion.utils import normalize_name, ensure_function_succeed

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -272,6 +273,21 @@ def deployment(self):
self._load_deployment_data()
return self._deployment

def _load_deployment_data_logic(self):
"""
Logic (is called with retries) to load model service deployment

:return: bool
"""
client = legion.k8s.utils.build_client()

extension_api = kubernetes.client.ExtensionsV1beta1Api(client)

all_deployments = extension_api.list_namespaced_deployment(self._k8s_service.metadata.namespace)
return next((deployment for deployment in all_deployments.items
if deployment.metadata.labels.get(DOMAIN_MODEL_ID) == self.id
and deployment.metadata.labels.get(DOMAIN_MODEL_VERSION) == self.version), None)

def _load_deployment_data(self):
"""
Load deployment data (lazy loading)
Expand All @@ -281,15 +297,10 @@ def _load_deployment_data(self):
if self._deployment_data_loaded:
return

client = legion.k8s.utils.build_client()

extension_api = kubernetes.client.ExtensionsV1beta1Api(client)
all_deployments = extension_api.list_namespaced_deployment(self._k8s_service.metadata.namespace)
model_deployments = [deployment for deployment in all_deployments.items
if deployment.metadata.labels.get(DOMAIN_MODEL_ID) == self.id
and deployment.metadata.labels.get(DOMAIN_MODEL_VERSION) == self.version]

self._deployment = model_deployments[0] if model_deployments else None
self._deployment = ensure_function_succeed(self._load_deployment_data_logic,
LOAD_DATA_ITERATIONS, LOAD_DATA_TIMEOUT)
if not self._deployment:
raise Exception('Failed to load deployment for {!r}'.format(self))

self._deployment_data_loaded = True

Expand All @@ -310,7 +321,10 @@ def scale(self):
:return: int -- current model scale
"""
self._load_deployment_data()
return self.deployment.status.available_replicas if self.deployment.status.available_replicas else 0
if self.deployment.status.available_replicas:
return self.deployment.status.available_replicas
else:
return 0

@scale.setter
def scale(self, new_scale):
Expand All @@ -329,15 +343,15 @@ def scale(self, new_scale):

extension_api = kubernetes.client.ExtensionsV1beta1Api(client)

old_scale = self._deployment.spec.replicas
self._deployment.spec.replicas = new_scale
old_scale = self.deployment.spec.replicas
self.deployment.spec.replicas = new_scale

LOGGER.info('Scaling service {} in namespace {} from {} to {} replicas'
.format(self._deployment.metadata.name, self._deployment.metadata.namespace, old_scale, new_scale))
.format(self.deployment.metadata.name, self.deployment.metadata.namespace, old_scale, new_scale))

extension_api.patch_namespaced_deployment(self._deployment.metadata.name,
self._deployment.metadata.namespace,
self._deployment)
extension_api.patch_namespaced_deployment(self.deployment.metadata.name,
self.deployment.metadata.namespace,
self.deployment)

self.reload_cache()

Expand Down
51 changes: 51 additions & 0 deletions legion/legion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@
import getpass
import distutils.dir_util
import re
import logging
import shutil
import socket
import subprocess
import sys
import time
import tempfile
import zipfile
import inspect

import legion.config
import legion.containers.headers
Expand All @@ -39,6 +42,7 @@


KUBERNETES_STRING_LENGTH_LIMIT = 63
LOGGER = logging.getLogger(__name__)


def render_template(template_name, values=None):
Expand Down Expand Up @@ -588,3 +592,50 @@ def deduce_model_file_name(model_id, model_version):
return os.path.join(default_prefix, file_name)

return file_name


def get_function_description(callable_object):
"""
Gather information about callable object to string

:param callable_object: callable object to analyze
:type callable_object: Callable[[], any]
:return: str -- object description
"""
object_class_name = callable_object.__class__.__name__
if not callable(callable_object):
return '<not callable object: {}>'.format(object_class_name)

object_name = callable_object.__name__
module_name = inspect.getmodule(callable_object)
return '<{} {} in {}>'.format(object_class_name, object_name, module_name)


def ensure_function_succeed(function_to_call, retries, timeout):
"""
Try to call function till it will return not None object.
Raise if there are no retries left

:param function_to_call: function to be called
:type function_to_call: Callable[[], any]
:param retries: count of retries
:type retries: int
:param timeout: timeout between retries
:type timeout: int
:return: Any -- result of successful function call or None if no retries left
"""
function_description = get_function_description(function_to_call)
for no in range(retries):
LOGGER.debug('Calling {}'.format(function_description))
result = function_to_call()
if result is not None:
return result

if no < retries:
LOGGER.debug('Retry {}/{} was failed'.format(no + 1, retries))
if no < retries - 1:
LOGGER.debug('Waiting {}s before next retry analysis'.format(timeout))
time.sleep(timeout)

LOGGER.error('No retries left for function {}'.format(function_description))
return None
68 changes: 68 additions & 0 deletions legion/tests/test_utils_other.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#
# Copyright 2017 EPAM Systems
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import legion.utils as utils

import unittest2


class TestUtilsOther(unittest2.TestCase):
def test_lambda_analyzing(self):
lamb = lambda x: x**2
description = utils.get_function_description(lamb)
self.assertIn('function <lambda>', description)
self.assertIn(__file__, description)

def test_function_analyzing(self):
def tmp():
return 42
description = utils.get_function_description(tmp)
self.assertIn('function tmp', description)
self.assertIn(__file__, description)

def test_non_callable_analyzing(self):
tmp = 42
description = utils.get_function_description(tmp)
self.assertIn('not callable object', description)

def test_ensure_retries_positive(self):
counter = 0

def func():
nonlocal counter
if counter > 1:
return 42
counter += 1

result = utils.ensure_function_succeed(func, 3, 1)
self.assertEqual(result, 42)
self.assertEqual(counter, 2)

def test_ensure_retries_negative(self):
counter = 0

def func():
nonlocal counter
if counter > 1:
return 42
counter += 1

result = utils.ensure_function_succeed(func, 2, 1)
self.assertEqual(result, None)
self.assertEqual(counter, 2)


if __name__ == '__main__':
unittest2.main()