Skip to content

Commit

Permalink
Merge commit '3f0b0e13653383eb630e6482a9b3d8c29891c9bf' into 587-para…
Browse files Browse the repository at this point in the history
…llelise
  • Loading branch information
tomwilkie committed Nov 4, 2015
2 parents 1ad7b4f + 3f0b0e1 commit 23c253f
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 4 deletions.
2 changes: 2 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ Included in this repo are tools shared by weave.git and scope.git. They include
suffixed with the number of hosts it requires, and the hosts available are
contained in the environment variable HOSTS, the tool will run tests in
parallel, on different hosts.
- ```scheduler```: an appengine application that can be used to distribute
tests across different shards in CircleCI.

## Using build-tools.git

Expand Down
21 changes: 21 additions & 0 deletions tools/cover/gather_coverage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# This scripts copies all the coverage reports from various circle shards,
# merges them and produces a complete report.

set -ex
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DESTINATION=$1
FROMDIR=$2
mkdir -p $DESTINATION

if [ -n "$CIRCLECI" ]; then
for i in $(seq 1 $(($CIRCLE_NODE_TOTAL - 1))); do
scp node$i:$FROMDIR/* $DESTINATION || true
done
fi

go get github.com/weaveworks/build-tools/cover
cover $DESTINATION/* >profile.cov
go tool cover -html=profile.cov -o coverage.html
go tool cover -func=profile.cov -o coverage.txt
tar czf coverage.tar.gz $DESTINATION
6 changes: 3 additions & 3 deletions tools/rebuild-image
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ rebuild() {
mkdir -p $CACHEDIR
rm $CACHEDIR/$SAVEDNAME* || true
docker build -t $IMAGENAME $IMAGEDIR
docker save $IMAGENAME:latest > $CACHEDIR/$SAVEDNAME-$CIRCLE_SHA1
docker save $IMAGENAME:latest | gzip - > $CACHEDIR/$SAVEDNAME-$CIRCLE_SHA1.gz
}

# Get the revision the cached image was build at
cached_image_rev() {
find $CACHEDIR -name "$SAVEDNAME-*" -type f | sed 's/[^\-]*\-//'
find $CACHEDIR -name "$SAVEDNAME-*" -type f | sed -n 's/^[^\-]*\-\([a-z0-9]*\).gz$/\1/p'
}

# Have there been any revision beween $1 and $2
Expand Down Expand Up @@ -60,4 +60,4 @@ fi

# we didn't rebuild; import cached version
echo ">>> No changes found, importing cached image"
docker load -i $CACHEDIR/$SAVEDNAME-$cached_revision
zcat $CACHEDIR/$SAVEDNAME-$cached_revision.gz | docker load
7 changes: 6 additions & 1 deletion tools/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,16 @@ func updateScheduler(test string, duration float64) {

func getSchedule(tests []string) ([]string, error) {
var (
testRun = "integration-" + os.Getenv("CIRCLE_BUILD_NUM")
prefix = os.Getenv("SCHEDULER_PREFIX")
buildNum = os.Getenv("CIRCLE_BUILD_NUM")
shardCount = os.Getenv("CIRCLE_NODE_TOTAL")
shardID = os.Getenv("CIRCLE_NODE_INDEX")
requestBody = &bytes.Buffer{}
testRun = "integration-" + buildNum
)
if prefix != "" {
testRun = prefix + "-" + buildNum
}
if err := json.NewEncoder(requestBody).Encode(schedule{tests}); err != nil {
return []string{}, err
}
Expand Down
38 changes: 38 additions & 0 deletions tools/sched
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/python
import sys, string, json, urllib
import requests

BASE_URL="http://positive-cocoa-90213.appspot.com"

def test_time(test_name, runtime):
r = requests.post(BASE_URL + "/record/%s/%f" % (urllib.quote(test_name, safe=""), runtime))
print r.text
assert r.status_code == 204

def test_sched(test_run, shard_count, shard_id):
tests = json.dumps({'tests': string.split(sys.stdin.read())})
r = requests.post(BASE_URL + "/schedule/%s/%d/%d" % (test_run, shard_count, shard_id), data=tests)
assert r.status_code == 200
result = r.json()
for test in sorted(result['tests']):
print test

def usage():
print "%s <cmd> <args..>" % sys.argv[0]
print " time <test name> <run time>"
print " sched <test run> <num shards> <shard id>"

def main():
if len(sys.argv) < 4:
usage()
sys.exit(1)

if sys.argv[1] == "time":
test_time(sys.argv[2], float(sys.argv[3]))
elif sys.argv[1] == "sched":
test_sched(sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))
else:
usage()

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions tools/scheduler/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
lib
6 changes: 6 additions & 0 deletions tools/scheduler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
To upload newer version:

```
pip install -r requirements.txt -t lib
appcfg.py update .
```
13 changes: 13 additions & 0 deletions tools/scheduler/app.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
application: positive-cocoa-90213
version: 1
runtime: python27
api_version: 1
threadsafe: true

handlers:
- url: .*
script: main.app

libraries:
- name: webapp2
version: latest
3 changes: 3 additions & 0 deletions tools/scheduler/appengine_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from google.appengine.ext import vendor

vendor.add('lib')
4 changes: 4 additions & 0 deletions tools/scheduler/cron.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cron:
- description: periodic gc
url: /tasks/gc
schedule: every 5 minutes
112 changes: 112 additions & 0 deletions tools/scheduler/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import collections
import json
import logging
import operator
import re

import flask
from oauth2client.client import GoogleCredentials
from googleapiclient import discovery

from google.appengine.api import urlfetch
from google.appengine.ext import ndb

app = flask.Flask('scheduler')
app.debug = True

# We use exponential moving average to record
# test run times. Higher alpha discounts historic
# observations faster.
alpha = 0.3

PROJECT = 'positive-cocoa-90213'
ZONE = 'us-central1-a'

class Test(ndb.Model):
total_run_time = ndb.FloatProperty(default=0.) # Not total, but a EWMA
total_runs = ndb.IntegerProperty(default=0)

class Schedule(ndb.Model):
shards = ndb.JsonProperty()

@app.route('/record/<path:test_name>/<runtime>', methods=['POST'])
@ndb.transactional
def record(test_name, runtime):
test = Test.get_by_id(test_name)
if test is None:
test = Test(id=test_name)
test.total_run_time = (test.total_run_time * (1-alpha)) + (float(runtime) * alpha)
test.total_runs += 1
test.put()
return ('', 204)

@app.route('/schedule/<test_run>/<int:shard_count>/<int:shard>', methods=['POST'])
def schedule(test_run, shard_count, shard):
# read tests from body
test_names = flask.request.get_json(force=True)['tests']

# first see if we have a scedule already
schedule_id = "%s-%d" % (test_run, shard_count)
schedule = Schedule.get_by_id(schedule_id)
if schedule is not None:
return flask.json.jsonify(tests=schedule.shards[str(shard)])

# if not, do simple greedy algorithm
test_times = ndb.get_multi(ndb.Key(Test, test_name) for test_name in test_names)
def avg(test):
if test is not None:
return test.total_run_time
return 1
test_times = [(test_name, avg(test)) for test_name, test in zip(test_names, test_times)]
test_times_dict = dict(test_times)
test_times.sort(key=operator.itemgetter(1))

shards = {i: [] for i in xrange(shard_count)}
while test_times:
test_name, time = test_times.pop()

# find shortest shard and put it in that
s, _ = min(((i, sum(test_times_dict[t] for t in shards[i]))
for i in xrange(shard_count)), key=operator.itemgetter(1))

shards[s].append(test_name)

# atomically insert or retrieve existing schedule
schedule = Schedule.get_or_insert(schedule_id, shards=shards)
return flask.json.jsonify(tests=schedule.shards[str(shard)])

NAME_RE = re.compile(r'^host(?P<index>\d+)-(?P<build>\d+)-(?P<shard>\d+)$')

@app.route('/tasks/gc')
def gc():
# Get list of running VMs, pick build id out of VM name
credentials = GoogleCredentials.get_application_default()
compute = discovery.build('compute', 'v1', credentials=credentials)
instances = compute.instances().list(project=PROJECT, zone=ZONE).execute()
host_by_build = collections.defaultdict(list)
for instance in instances['items']:
matches = NAME_RE.match(instance['name'])
if matches is None:
continue
host_by_build[int(matches.group('build'))].append(instance['name'])
logging.info("Running VMs by build: %r", host_by_build)

# Get list of builds, filter down to runnning builds
result = urlfetch.fetch('https://circleci.com/api/v1/project/weaveworks/weave',
headers={'Accept': 'application/json'})
assert result.status_code == 200
builds = json.loads(result.content)
running = {build['build_num'] for build in builds if build['status'] == 'running'}
logging.info("Runnings builds: %r", running)

# Stop VMs for builds that aren't running
stopped = []
for build, names in host_by_build.iteritems():
if build in running:
continue
for name in names:
stopped.append(name)
logging.info("Stopping VM %s", name)
compute.instances().delete(project=PROJECT, zone=ZONE, instance=name).execute()

return (flask.json.jsonify(running=list(running), stopped=stopped), 200)
2 changes: 2 additions & 0 deletions tools/scheduler/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
flask
google-api-python-client

0 comments on commit 23c253f

Please sign in to comment.