Skip to content

Commit

Permalink
[riak] add integration test ✅
Browse files Browse the repository at this point in the history
* build Riak from source (two riaks to be precise, to be able to build a
  cluster)
* add custom tags to Riak check + usual pep8
* add a new test for Riak
* do not run test on Travis if not able to use the cache
And also: delete rotated log before caching
  • Loading branch information
degemer committed Apr 7, 2015
1 parent 39cb19e commit a496053
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 32 deletions.
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ env:
- TRAVIS_FLAVOR=redis FLAVOR_VERSION=2.4.18
- TRAVIS_FLAVOR=redis FLAVOR_VERSION=2.6.17
- TRAVIS_FLAVOR=redis FLAVOR_VERSION=2.8.19
# Compilation takes too much time on Travis
# - TRAVIS_FLAVOR=riak
- TRAVIS_FLAVOR=snmpd
- TRAVIS_FLAVOR=ssh
- TRAVIS_FLAVOR=supervisord
Expand Down
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ require './ci/phpfpm'
require './ci/postgres'
require './ci/rabbitmq'
require './ci/redis'
require './ci/riak'
require './ci/snmpd'
require './ci/ssh'
require './ci/supervisord'
Expand Down
63 changes: 32 additions & 31 deletions checks.d/riak.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# stdlib
import time
from hashlib import md5
import socket

# project
Expand All @@ -10,44 +8,45 @@
import simplejson as json
from httplib2 import Http, HttpLib2Error


class Riak(AgentCheck):
SERVICE_CHECK_NAME = 'riak.can_connect'

keys = [
"vnode_gets",
"vnode_puts",
"vnode_index_reads",
"vnode_index_writes",
"vnode_index_deletes",
"node_gets",
"node_puts",
"pbc_active",
"pbc_connects",
"memory_total",
"memory_processes",
"memory_processes_used",
"memory_atom",
"memory_atom_used",
"memory_binary",
"memory_code",
"memory_ets",
"read_repairs",
"node_put_fsm_rejected_60s",
"node_put_fsm_active_60s",
"node_put_fsm_in_rate",
"node_put_fsm_out_rate",
"node_get_fsm_rejected_60s",
"memory_processes",
"memory_processes_used",
"memory_total",
"node_get_fsm_active_60s",
"node_get_fsm_in_rate",
"node_get_fsm_out_rate"
"node_get_fsm_rejected_60s",
"node_gets",
"node_put_fsm_active_60s",
"node_put_fsm_in_rate",
"node_put_fsm_out_rate",
"node_put_fsm_rejected_60s",
"node_puts",
"pbc_active",
"pbc_connects",
"read_repairs",
"vnode_gets",
"vnode_index_deletes",
"vnode_index_reads",
"vnode_index_writes",
"vnode_puts",
]

stat_keys = [
"node_get_fsm_siblings",
"node_get_fsm_objsize",
"node_get_fsm_siblings",
"node_get_fsm_time",
"node_put_fsm_time"
]
]

def __init__(self, name, init_config, agentConfig, instances=None):
AgentCheck.__init__(self, name, init_config, agentConfig, instances)
Expand All @@ -57,30 +56,32 @@ def __init__(self, name, init_config, agentConfig, instances=None):
self.prev_coord_redirs_total = -1

def check(self, instance):
url = instance['url']
url = instance['url']
default_timeout = self.init_config.get('default_timeout', 5)
timeout = float(instance.get('timeout', default_timeout))
service_check_tags = ['url:%s' % url]
timeout = float(instance.get('timeout', default_timeout))
tags = instance.get('tags', [])
service_check_tags = tags + ['url:%s' % url]

try:
h = Http(timeout=timeout)
resp, content = h.request(url, "GET")
except (socket.timeout, socket.error, HttpLib2Error) as e:
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
message="Unable to fetch Riak stats: %s" % str(e),
tags=service_check_tags)
message="Unable to fetch Riak stats: %s" % str(e),
tags=service_check_tags)
raise

if resp.status != 200:
self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL,
tags=service_check_tags,
message="Unexpected status of %s when fetching Riak stats, " \
"response: %s" % (resp.status, content))
tags=service_check_tags,
message="Unexpected status of %s when fetching Riak stats, "
"response: %s" % (resp.status, content))

stats = json.loads(content)
self.service_check(
self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=service_check_tags)

[self.gauge("riak." + k, stats[k]) for k in self.keys if k in stats]
[self.gauge("riak." + k, stats[k], tags=tags) for k in self.keys if k in stats]

coord_redirs_total = stats["coord_redirs_total"]
if self.prev_coord_redirs_total > -1:
Expand Down
2 changes: 1 addition & 1 deletion ci/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def section(name)

task :before_cache do |t|
section('BEFORE_CACHE')
sh %(find $INTEGRATIONS_DIR/ -type f -name '*.log' -delete)
sh %(find $INTEGRATIONS_DIR/ -type f -name '*.log*' -delete)
t.reenable
end

Expand Down
98 changes: 98 additions & 0 deletions ci/riak.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
require './ci/common'

def riak_version
ENV['COUCHDB_VERSION'] || '2.0.5'
end

def riak_rootdir
"#{ENV['INTEGRATIONS_DIR']}/riak_#{riak_version}"
end

namespace :ci do
namespace :riak do |flavor|
task :before_install => ['ci:common:before_install']

task :install => ['ci:common:install'] do
unless Dir.exist? File.expand_path(riak_rootdir)
sh %(curl -o $VOLATILE_DIR/kerl https://raw.githubusercontent.com/spawngrid/kerl/master/kerl)
sh %(chmod a+x $VOLATILE_DIR/kerl)
sh %($VOLATILE_DIR/kerl build git git://github.com/basho/otp.git OTP_R16B02 R16B02)
sh %($VOLATILE_DIR/kerl install R16B02 $VOLATILE_DIR/erlang/R16B02)
sh %(curl -o $VOLATILE_DIR/riak.tar.gz http://s3.amazonaws.com/downloads.basho.com/riak/#{riak_version[0..2]}/#{riak_version}/riak-#{riak_version}.tar.gz)
sh %(mkdir -p $VOLATILE_DIR/riak)
sh %(tar zxvf $VOLATILE_DIR/riak.tar.gz -C $VOLATILE_DIR/riak --strip-components=1)
sh %(cd $VOLATILE_DIR/riak\
&& PATH=$PATH:$VOLATILE_DIR/erlang/R16B02/bin make all\
&& PATH=$PATH:$VOLATILE_DIR/erlang/R16B02/bin make devrel DEVNODES=2)
sh %(mv $VOLATILE_DIR/riak/dev #{riak_rootdir})
end
end

task :before_script => ['ci:common:before_script'] do
%w(dev1 dev2).each do |dev|
sh %(#{riak_rootdir}/#{dev}/bin/riak start)
end
# When cached, dev2 is already a member of the cluster
sh %(#{riak_rootdir}/dev2/bin/riak-admin cluster join dev1@127.0.0.1 || true)
sh %(#{riak_rootdir}/dev2/bin/riak-admin cluster plan)
sh %(#{riak_rootdir}/dev2/bin/riak-admin cluster commit)
10.times do
sh %(curl -s -XPUT http://localhost:10018/buckets/welcome/keys/german\
-H 'Content-Type: text/plain'\
-d 'herzlich willkommen')
sh %(curl -s http://localhost:10018/buckets/welcome/keys/german)
end
end

task :script => ['ci:common:script'] do
this_provides = [
'riak'
]
Rake::Task['ci:common:run_tests'].invoke(this_provides)
end

task :before_cache => ['ci:common:before_cache'] do
Rake::Task['ci:riak:cleanup'].invoke
end

task :cache => ['ci:common:cache']

task :cleanup => ['ci:common:cleanup'] do
%w(dev1 dev2).each do |dev|
sh %(#{riak_rootdir}/#{dev}/bin/riak stop)
sh %(rm -rf #{riak_rootdir}/#{dev}/data)
end
end

task :execute do
exception = nil
# Compilation takes too long on Travis, so it's using the cache
# External contributors don't have access to the cache
# So they can't run these tests
if ENV['TRAVIS'] && ENV['AWS_SECRET_ACCESS_KEY'].nil?
puts "Riak tests won't run, compilation takes too long on Travis"
else
begin
%w(before_install install before_script script).each do |t|
Rake::Task["#{flavor.scope.path}:#{t}"].invoke
end
rescue => e
exception = e
puts "Failed task: #{e.class} #{e.message}".red
end
if ENV['SKIP_CLEANUP']
puts 'Skipping cleanup, disposable environments are great'.yellow
else
puts 'Cleaning up'
Rake::Task["#{flavor.scope.path}:cleanup"].invoke
end
if ENV['TRAVIS']
%w(before_cache cache).each do |t|
Rake::Task["#{flavor.scope.path}:#{t}"].invoke
end
end
fail exception if exception
end
end
end
end
3 changes: 3 additions & 0 deletions conf.d/riak.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@ init_config:

instances:
- url: http://127.0.0.1:8098/stats
# tags:
# - optional_tag1
# - optional_tag2
101 changes: 101 additions & 0 deletions tests/test_riak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from nose.plugins.attrib import attr
import socket
from tests.common import AgentCheckTest


@attr(requires='riak')
class RiakTestCase(AgentCheckTest):

CHECK_NAME = 'riak'

CHECK_GAUGES = [
'riak.memory_atom',
'riak.memory_atom_used',
'riak.memory_binary',
'riak.memory_code',
'riak.memory_ets',
'riak.memory_processes',
'riak.memory_processes_used',
'riak.memory_total',
'riak.node_get_fsm_active_60s',
'riak.node_get_fsm_in_rate',
'riak.node_gets',
'riak.node_put_fsm_active_60s',
'riak.node_put_fsm_in_rate',
'riak.node_put_fsm_out_rate',
'riak.node_put_fsm_rejected_60s',
'riak.node_puts',
'riak.pbc_active',
'riak.pbc_connects',
'riak.read_repairs',
'riak.vnode_gets',
'riak.vnode_index_deletes',
'riak.vnode_index_reads',
'riak.vnode_index_writes',
'riak.vnode_puts',
]

CHECK_GAUGES_STATS = [
'riak.node_get_fsm_objsize_100',
'riak.node_get_fsm_objsize_95',
'riak.node_get_fsm_objsize_99',
'riak.node_get_fsm_objsize_mean',
'riak.node_get_fsm_objsize_median',
'riak.node_get_fsm_siblings_100',
'riak.node_get_fsm_siblings_95',
'riak.node_get_fsm_siblings_99',
'riak.node_get_fsm_siblings_mean',
'riak.node_get_fsm_siblings_median',
'riak.node_get_fsm_time_100',
'riak.node_get_fsm_time_95',
'riak.node_get_fsm_time_99',
'riak.node_get_fsm_time_mean',
'riak.node_get_fsm_time_median',
'riak.node_put_fsm_time_100',
'riak.node_put_fsm_time_95',
'riak.node_put_fsm_time_99',
'riak.node_put_fsm_time_mean',
'riak.node_put_fsm_time_median',
]

# FIXME
# Does not appear when null and we can't really fake it
# These metrics do not appear in the docs
# http://docs.basho.com/riak/latest/ops/running/stats-and-monitoring/
CHECK_NOT_TESTED = [
'riak.node_get_fsm_out_rate',
'riak.node_get_fsm_rejected_60s',
]

SERVICE_CHECK_NAME = 'riak.can_connect'

def test_riak(self):
config_dev1 = {
"instances": [{
"url": "http://localhost:10018/stats",
"tags": ["my_tag"]
}]
}
self.run_check(config_dev1)
tags = ['my_tag']
sc_tags = tags + ['url:' + config_dev1['instances'][0]['url']]

for gauge in self.CHECK_GAUGES + self.CHECK_GAUGES_STATS:
self.assertMetric(gauge, count=1, tags=tags)

self.assertServiceCheckOK(self.SERVICE_CHECK_NAME,
tags=sc_tags,
count=1)
self.coverage_report()

def test_bad_config(self):
self.assertRaises(
socket.error,
lambda: self.run_check({"instances": [{"url": "http://localhost:5985"}]})
)
sc_tags = ['url:http://localhost:5985']

self.assertServiceCheckCritical(self.SERVICE_CHECK_NAME,
tags=sc_tags,
count=1)
self.coverage_report()

0 comments on commit a496053

Please sign in to comment.