diff --git a/tools/README.md b/tools/README.md index 85fdfeb39b..32ddb57b1e 100644 --- a/tools/README.md +++ b/tools/README.md @@ -17,6 +17,8 @@ Included in this repo are tools shared by weave.git and scope.git. They include suffixed with the number of hosts it requires, and the hosts available are contained in the environment variable HOSTS, the tool will run tests in parallel, on different hosts. +- ```scheduler```: an appengine application that can be used to distribute + tests across different shards in CircleCI. ## Using build-tools.git diff --git a/tools/cover/gather_coverage.sh b/tools/cover/gather_coverage.sh new file mode 100755 index 0000000000..9026745a1f --- /dev/null +++ b/tools/cover/gather_coverage.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# This scripts copies all the coverage reports from various circle shards, +# merges them and produces a complete report. + +set -ex +DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DESTINATION=$1 +FROMDIR=$2 +mkdir -p $DESTINATION + +if [ -n "$CIRCLECI" ]; then + for i in $(seq 1 $(($CIRCLE_NODE_TOTAL - 1))); do + scp node$i:$FROMDIR/* $DESTINATION || true + done +fi + +go get github.com/weaveworks/build-tools/cover +cover $DESTINATION/* >profile.cov +go tool cover -html=profile.cov -o coverage.html +go tool cover -func=profile.cov -o coverage.txt +tar czf coverage.tar.gz $DESTINATION diff --git a/tools/rebuild-image b/tools/rebuild-image index 0eb3cff3c8..df3a371c42 100755 --- a/tools/rebuild-image +++ b/tools/rebuild-image @@ -17,12 +17,12 @@ rebuild() { mkdir -p $CACHEDIR rm $CACHEDIR/$SAVEDNAME* || true docker build -t $IMAGENAME $IMAGEDIR - docker save $IMAGENAME:latest > $CACHEDIR/$SAVEDNAME-$CIRCLE_SHA1 + docker save $IMAGENAME:latest | gzip - > $CACHEDIR/$SAVEDNAME-$CIRCLE_SHA1.gz } # Get the revision the cached image was build at cached_image_rev() { - find $CACHEDIR -name "$SAVEDNAME-*" -type f | sed 's/[^\-]*\-//' + find $CACHEDIR -name "$SAVEDNAME-*" -type f | sed -n 's/^[^\-]*\-\([a-z0-9]*\).gz$/\1/p' } # Have there been any revision beween $1 and $2 @@ -60,4 +60,4 @@ fi # we didn't rebuild; import cached version echo ">>> No changes found, importing cached image" -docker load -i $CACHEDIR/$SAVEDNAME-$cached_revision +zcat $CACHEDIR/$SAVEDNAME-$cached_revision.gz | docker load diff --git a/tools/runner/runner.go b/tools/runner/runner.go index bfac9c58b9..f09ffcd6da 100644 --- a/tools/runner/runner.go +++ b/tools/runner/runner.go @@ -138,11 +138,16 @@ func updateScheduler(test string, duration float64) { func getSchedule(tests []string) ([]string, error) { var ( - testRun = "integration-" + os.Getenv("CIRCLE_BUILD_NUM") + prefix = os.Getenv("SCHEDULER_PREFIX") + buildNum = os.Getenv("CIRCLE_BUILD_NUM") shardCount = os.Getenv("CIRCLE_NODE_TOTAL") shardID = os.Getenv("CIRCLE_NODE_INDEX") requestBody = &bytes.Buffer{} + testRun = "integration-" + buildNum ) + if prefix != "" { + testRun = prefix + "-" + buildNum + } if err := json.NewEncoder(requestBody).Encode(schedule{tests}); err != nil { return []string{}, err } diff --git a/tools/sched b/tools/sched new file mode 100755 index 0000000000..e94e8af8f1 --- /dev/null +++ b/tools/sched @@ -0,0 +1,38 @@ +#!/usr/bin/python +import sys, string, json, urllib +import requests + +BASE_URL="http://positive-cocoa-90213.appspot.com" + +def test_time(test_name, runtime): + r = requests.post(BASE_URL + "/record/%s/%f" % (urllib.quote(test_name, safe=""), runtime)) + print r.text + assert r.status_code == 204 + +def test_sched(test_run, shard_count, shard_id): + tests = json.dumps({'tests': string.split(sys.stdin.read())}) + r = requests.post(BASE_URL + "/schedule/%s/%d/%d" % (test_run, shard_count, shard_id), data=tests) + assert r.status_code == 200 + result = r.json() + for test in sorted(result['tests']): + print test + +def usage(): + print "%s " % sys.argv[0] + print " time " + print " sched " + +def main(): + if len(sys.argv) < 4: + usage() + sys.exit(1) + + if sys.argv[1] == "time": + test_time(sys.argv[2], float(sys.argv[3])) + elif sys.argv[1] == "sched": + test_sched(sys.argv[2], int(sys.argv[3]), int(sys.argv[4])) + else: + usage() + +if __name__ == '__main__': + main() diff --git a/tools/scheduler/.gitignore b/tools/scheduler/.gitignore new file mode 100644 index 0000000000..a65b41774a --- /dev/null +++ b/tools/scheduler/.gitignore @@ -0,0 +1 @@ +lib diff --git a/tools/scheduler/README.md b/tools/scheduler/README.md new file mode 100644 index 0000000000..8489d78706 --- /dev/null +++ b/tools/scheduler/README.md @@ -0,0 +1,6 @@ +To upload newer version: + +``` +pip install -r requirements.txt -t lib +appcfg.py update . +``` diff --git a/tools/scheduler/app.yaml b/tools/scheduler/app.yaml new file mode 100644 index 0000000000..8bc59f0049 --- /dev/null +++ b/tools/scheduler/app.yaml @@ -0,0 +1,13 @@ +application: positive-cocoa-90213 +version: 1 +runtime: python27 +api_version: 1 +threadsafe: true + +handlers: +- url: .* + script: main.app + +libraries: +- name: webapp2 + version: latest diff --git a/tools/scheduler/appengine_config.py b/tools/scheduler/appengine_config.py new file mode 100644 index 0000000000..f4489ff968 --- /dev/null +++ b/tools/scheduler/appengine_config.py @@ -0,0 +1,3 @@ +from google.appengine.ext import vendor + +vendor.add('lib') diff --git a/tools/scheduler/cron.yaml b/tools/scheduler/cron.yaml new file mode 100644 index 0000000000..652aed802a --- /dev/null +++ b/tools/scheduler/cron.yaml @@ -0,0 +1,4 @@ +cron: +- description: periodic gc + url: /tasks/gc + schedule: every 5 minutes diff --git a/tools/scheduler/main.py b/tools/scheduler/main.py new file mode 100644 index 0000000000..ed0c78e315 --- /dev/null +++ b/tools/scheduler/main.py @@ -0,0 +1,112 @@ +import collections +import json +import logging +import operator +import re + +import flask +from oauth2client.client import GoogleCredentials +from googleapiclient import discovery + +from google.appengine.api import urlfetch +from google.appengine.ext import ndb + +app = flask.Flask('scheduler') +app.debug = True + +# We use exponential moving average to record +# test run times. Higher alpha discounts historic +# observations faster. +alpha = 0.3 + +PROJECT = 'positive-cocoa-90213' +ZONE = 'us-central1-a' + +class Test(ndb.Model): + total_run_time = ndb.FloatProperty(default=0.) # Not total, but a EWMA + total_runs = ndb.IntegerProperty(default=0) + +class Schedule(ndb.Model): + shards = ndb.JsonProperty() + +@app.route('/record//', methods=['POST']) +@ndb.transactional +def record(test_name, runtime): + test = Test.get_by_id(test_name) + if test is None: + test = Test(id=test_name) + test.total_run_time = (test.total_run_time * (1-alpha)) + (float(runtime) * alpha) + test.total_runs += 1 + test.put() + return ('', 204) + +@app.route('/schedule///', methods=['POST']) +def schedule(test_run, shard_count, shard): + # read tests from body + test_names = flask.request.get_json(force=True)['tests'] + + # first see if we have a scedule already + schedule_id = "%s-%d" % (test_run, shard_count) + schedule = Schedule.get_by_id(schedule_id) + if schedule is not None: + return flask.json.jsonify(tests=schedule.shards[str(shard)]) + + # if not, do simple greedy algorithm + test_times = ndb.get_multi(ndb.Key(Test, test_name) for test_name in test_names) + def avg(test): + if test is not None: + return test.total_run_time + return 1 + test_times = [(test_name, avg(test)) for test_name, test in zip(test_names, test_times)] + test_times_dict = dict(test_times) + test_times.sort(key=operator.itemgetter(1)) + + shards = {i: [] for i in xrange(shard_count)} + while test_times: + test_name, time = test_times.pop() + + # find shortest shard and put it in that + s, _ = min(((i, sum(test_times_dict[t] for t in shards[i])) + for i in xrange(shard_count)), key=operator.itemgetter(1)) + + shards[s].append(test_name) + + # atomically insert or retrieve existing schedule + schedule = Schedule.get_or_insert(schedule_id, shards=shards) + return flask.json.jsonify(tests=schedule.shards[str(shard)]) + +NAME_RE = re.compile(r'^host(?P\d+)-(?P\d+)-(?P\d+)$') + +@app.route('/tasks/gc') +def gc(): + # Get list of running VMs, pick build id out of VM name + credentials = GoogleCredentials.get_application_default() + compute = discovery.build('compute', 'v1', credentials=credentials) + instances = compute.instances().list(project=PROJECT, zone=ZONE).execute() + host_by_build = collections.defaultdict(list) + for instance in instances['items']: + matches = NAME_RE.match(instance['name']) + if matches is None: + continue + host_by_build[int(matches.group('build'))].append(instance['name']) + logging.info("Running VMs by build: %r", host_by_build) + + # Get list of builds, filter down to runnning builds + result = urlfetch.fetch('https://circleci.com/api/v1/project/weaveworks/weave', + headers={'Accept': 'application/json'}) + assert result.status_code == 200 + builds = json.loads(result.content) + running = {build['build_num'] for build in builds if build['status'] == 'running'} + logging.info("Runnings builds: %r", running) + + # Stop VMs for builds that aren't running + stopped = [] + for build, names in host_by_build.iteritems(): + if build in running: + continue + for name in names: + stopped.append(name) + logging.info("Stopping VM %s", name) + compute.instances().delete(project=PROJECT, zone=ZONE, instance=name).execute() + + return (flask.json.jsonify(running=list(running), stopped=stopped), 200) diff --git a/tools/scheduler/requirements.txt b/tools/scheduler/requirements.txt new file mode 100644 index 0000000000..d4d47e6eb6 --- /dev/null +++ b/tools/scheduler/requirements.txt @@ -0,0 +1,2 @@ +flask +google-api-python-client