Skip to content
This repository has been archived by the owner on May 6, 2020. It is now read-only.

Commit

Permalink
Handle cases with namespace quotas limits sets (#1182)
Browse files Browse the repository at this point in the history
* feat(resourcequota): Handle cases with namespace quotas limits sets

Handle 2 cases:
* User did not set quota for applications or set it incorrectly
* User tries to scale the application when limits are already exceeded (overuse)

* Don't raise RuntimeError if no events in namespace

* Add test for _handle_not_ready_pors function

* Fix indents required by flake8

* Use replicaset events for handle quota exceed cases

* Don't try to wait for pods started if we have failed events in ReplicaSet
  • Loading branch information
rvadim authored and Matthew Fisher committed Feb 8, 2017
1 parent 8c8b5ab commit 9c2d584
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 4 deletions.
2 changes: 1 addition & 1 deletion rootfs/scheduler/mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def filter_data(filters, path):
continue

# check if item has labels
if 'labels' not in item['metadata']:
if 'labels' not in item['metadata'] and item['kind'] != 'Event':
continue

# Do extra filtering based on labelSelector
Expand Down
42 changes: 42 additions & 0 deletions rootfs/scheduler/resources/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,12 @@ def in_progress(self, namespace, name, timeout, batches, replicas, tags):
self.log(namespace, 'Deploy operation for Deployment {} in has expired. Rolling back to last good known release'.format(name), level='DEBUG') # noqa
return False, True

try:
self._check_for_failed_events(namespace, labels=labels)
except KubeException as e:
self.log(namespace, e)
return False, True

return True, False

def are_replicas_ready(self, namespace, name):
Expand Down Expand Up @@ -326,6 +332,9 @@ def wait_until_ready(self, namespace, name, **kwargs):
timeout = len(batches) * deploy_timeout
self.log(namespace, 'This deployments overall timeout is {}s - batch timout is {}s and there are {} batches to deploy with a total of {} pods'.format(timeout, deploy_timeout, len(batches), replicas)) # noqa

# check for failed events(when quota exceeded for example)
self._check_for_failed_events(namespace, labels=labels)

waited = 0
while waited < timeout:
ready, availablePods = self.are_replicas_ready(namespace, name)
Expand All @@ -352,6 +361,39 @@ def wait_until_ready(self, namespace, name, **kwargs):
if not ready:
self.pod._handle_not_ready_pods(namespace, labels)

def _check_for_failed_events(self, namespace, labels):
"""
Request for new ReplicaSet of Deployment and search for failed events involved by that RS
Raises: KubeException when RS have events with FailedCreate reason
"""
response = self.rs.get(namespace, labels=labels)
data = response.json()
fields = {
'involvedObject.kind': 'ReplicaSet',
'involvedObject.name': data['items'][0]['metadata']['name'],
'involvedObject.namespace': namespace,
'involvedObject.uid': data['items'][0]['metadata']['uid'],
}
events_list = self.ns.events(namespace, fields=fields).json()
events = events_list.get('items', [])
if events is not None and len(events) != 0:
for event in events:
if event['reason'] == 'FailedCreate':
log = self._get_formatted_messages(events)
self.log(namespace, log)
raise KubeException(log)

@staticmethod
def _get_formatted_messages(events):
"""
Format each event by string and join all events to one string
"""
message_format = 'Message:{message}, lastTimestamp:{lastTimestamp}, reason: {reason}, count: {count}' # noqa
output = []
for event in events:
output.append(message_format.format(**event))
return '\n'.join(output)

def _get_deploy_steps(self, batches, tags):
# if there is no batch information available default to available nodes for app
if not batches:
Expand Down
44 changes: 44 additions & 0 deletions rootfs/scheduler/resources/events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from scheduler.exceptions import KubeHTTPException
from scheduler.resources import Resource
from datetime import datetime
import uuid

DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'


class Events(Resource):
"""
Events resource.
Warning! Used ONLY for testing purposes
"""
short_name = 'ev'

def create(self, namespace, name, message, **kwargs):
url = self.api('/namespaces/{}/events'.format(namespace))
data = {
'kind': 'Event',
'apiVersion': 'v1',
'count': kwargs.get('count', 1),
'metadata': {
'creationTimestamp': datetime.now().strftime(DATETIME_FORMAT),
'namespace': namespace,
'name': name,
'resourceVersion': kwargs.get('resourceVersion', ''),
'uid': str(uuid.uuid4()),
},
'message': message,
'type': kwargs.get('type', 'Normal'),
'firstTimestamp': datetime.now().strftime(DATETIME_FORMAT),
'lastTimestamp': datetime.now().strftime(DATETIME_FORMAT),
'reason': kwargs.get('reason', ''),
'source': {
'component': kwargs.get('component', ''),
},
'involvedObject': kwargs.get('involvedObject', {})
}

response = self.http_post(url, json=data)
if not response.status_code == 201:
raise KubeHTTPException(response, 'create Event for namespace {}'.format(namespace)) # noqa

return response
2 changes: 0 additions & 2 deletions rootfs/scheduler/resources/pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,8 +719,6 @@ def _handle_not_ready_pods(self, namespace, labels):
message = "\n".join([x.strip() for x in event['message'].split("\n")])
raise KubeException(message)

return None

def deploy_probe_timeout(self, timeout, namespace, labels, containers):
"""
Added in additional timeouts based on readiness and liveness probe
Expand Down
25 changes: 24 additions & 1 deletion rootfs/scheduler/tests/test_deployments.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Run the tests with './manage.py test scheduler'
"""
from scheduler import KubeHTTPException
from scheduler import KubeHTTPException, KubeException
from scheduler.tests import TestCase
from scheduler.utils import generate_random_name

Expand Down Expand Up @@ -240,3 +240,26 @@ def test_get_deployment_replicaset(self):
data['metadata']['labels'],
data
)

def test_check_for_failed_events(self):
deploy_name = self.create(self.namespace)
deployment = self.scheduler.deployment.get(self.namespace, deploy_name).json()
response = self.scheduler.rs.get(self.namespace, labels=deployment['metadata']['labels'])
rs = response.json()
involved_object = {
'involvedObject.kind': 'ReplicaSet',
'involvedObject.name': rs['items'][0]['metadata']['name'],
'involvedObject.namespace': self.namespace,
'involvedObject.uid': rs['items'][0]['metadata']['uid'],
}
message = 'Quota exeeded'
self.scheduler.ev.create(self.namespace,
'{}'.format(generate_random_name()),
message,
type='Warning',
involved_object=involved_object,
reason='FailedCreate')
with self.assertRaisesRegex(KubeException,
'Message:{}.*'.format(message)):
self.scheduler.deployment._check_for_failed_events(self.namespace,
labels=deployment['metadata']['labels']) # noqa

0 comments on commit 9c2d584

Please sign in to comment.