Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

Extend scheduler to periodically garbage collect stranded VMs #1227

Merged
merged 2 commits into from
Jul 28, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions test/scheduler/.gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
*.egg-info
*.dist-info
flask
jinja2
markupsafe
werkzeug
itsdangerous.*
lib
2 changes: 1 addition & 1 deletion test/scheduler/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
To upload newer version:

```
pip install -r requirements.txt -t .
pip install -r requirements.txt -t lib
appcfg.py update .
```
1 change: 0 additions & 1 deletion test/scheduler/app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ handlers:
libraries:
- name: webapp2
version: latest

3 changes: 3 additions & 0 deletions test/scheduler/appengine_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from google.appengine.ext import vendor

vendor.add('lib')
4 changes: 4 additions & 0 deletions test/scheduler/cron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cron:
- description: periodic gc
url: /tasks/gc
schedule: every 5 minutes
46 changes: 46 additions & 0 deletions test/scheduler/main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
import collections
import json
import logging
import operator
import re

import flask
from oauth2client.client import GoogleCredentials
from googleapiclient import discovery

from google.appengine.api import urlfetch
from google.appengine.ext import ndb

app = flask.Flask('scheduler')
Expand All @@ -12,6 +19,9 @@
# observations faster.
alpha = 0.3

PROJECT = 'positive-cocoa-90213'
ZONE = 'us-central1-a'

class Test(ndb.Model):
total_run_time = ndb.FloatProperty(default=0.) # Not total, but a EWMA
total_runs = ndb.IntegerProperty(default=0)
Expand Down Expand Up @@ -64,3 +74,39 @@ def avg(test):
# atomically insert or retrieve existing schedule
schedule = Schedule.get_or_insert(schedule_id, shards=shards)
return flask.json.jsonify(tests=schedule.shards[str(shard)])

NAME_RE = re.compile(r'^host(?P<index>\d+)-(?P<build>\d+)-(?P<shard>\d+)$')

@app.route('/tasks/gc')
def gc():
# Get list of running VMs, pick build id out of VM name
credentials = GoogleCredentials.get_application_default()
compute = discovery.build('compute', 'v1', credentials=credentials)
instances = compute.instances().list(project=PROJECT, zone=ZONE).execute()
host_by_build = collections.defaultdict(list)
for instance in instances['items']:
matches = NAME_RE.match(instance['name'])
if matches is None:
continue
host_by_build[int(matches.group('build'))].append(instance['name'])
logging.info("Running VMs by build: %r", host_by_build)

# Get list of builds, filter down to runnning builds
result = urlfetch.fetch('https://circleci.com/api/v1/project/weaveworks/weave',
headers={'Accept': 'application/json'})
assert result.status_code == 200
builds = json.loads(result.content)
running = {build['build_num'] for build in builds if build['status'] == 'running'}
logging.info("Runnings builds: %r", running)

# Stop VMs for builds that aren't running
stopped = []
for build, names in host_by_build.iteritems():
if build in running:
continue
for name in names:
stopped.append(name)
logging.info("Stopping VM %s", name)
compute.instances().delete(project=PROJECT, zone=ZONE, instance=name).execute()

return (flask.json.jsonify(running=list(running), stopped=stopped), 200)
1 change: 1 addition & 0 deletions test/scheduler/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
flask
google-api-python-client