diff --git a/.travis.yml b/.travis.yml index fc29780c4b..538f7f849c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,6 +64,7 @@ env: - TRAVIS_FLAVOR=etcd - TRAVIS_FLAVOR=pgbouncer - TRAVIS_FLAVOR=supervisord + - TRAVIS_FLAVOR=phpfpm # Override travis defaults with empty jobs before_install: echo "OVERRIDING TRAVIS STEPS" diff --git a/Rakefile b/Rakefile index a33623d0fd..2963db7e7e 100755 --- a/Rakefile +++ b/Rakefile @@ -19,6 +19,7 @@ require './ci/mongo' require './ci/mysql' require './ci/nginx' require './ci/pgbouncer' +require './ci/phpfpm' require './ci/postgres' require './ci/rabbitmq' require './ci/redis' diff --git a/checks.d/php_fpm.py b/checks.d/php_fpm.py new file mode 100644 index 0000000000..ac103000c7 --- /dev/null +++ b/checks.d/php_fpm.py @@ -0,0 +1,125 @@ +# 3p +import requests + +# project +from checks import AgentCheck +from util import headers + + +class PHPFPMCheck(AgentCheck): + """ + Tracks basic php-fpm metrics via the status module + Requires php-fpm pools to have the status option. + See http://www.php.net/manual/de/install.fpm.configuration.php#pm.status-path for more details + """ + + SERVICE_CHECK_NAME = 'php_fpm.can_ping' + + GAUGES = { + 'listen queue': 'php_fpm.listen_queue.size', + 'idle processes': 'php_fpm.processes.idle', + 'active processes': 'php_fpm.processes.active', + 'total processes': 'php_fpm.processes.total', + } + + RATES = { + 'max children reached': 'php_fpm.processes.max_reached' + } + + COUNTERS = { + 'accepted conn': 'php_fpm.requests.accepted', + 'slow requests': 'php_fpm.requests.slow' + } + + def check(self, instance): + status_url = instance.get('status_url') + ping_url = instance.get('ping_url') + + auth = None + user = instance.get('user') + password = instance.get('password') + + tags = instance.get('tags', []) + + if user and password: + auth = (user, password) + + if status_url is None and ping_url is None: + raise Exception("No status_url or ping_url specified for this instance") + + pool = None + status_exception = None + if status_url is not None: + try: + pool = self._process_status(status_url, auth, tags) + except Exception as e: + status_exception = e + pass + + if ping_url is not None: + self._process_ping(ping_url, auth, tags, pool) + + if status_exception is not None: + raise status_exception + + def _process_status(self, status_url, auth, tags): + data = {} + try: + # TODO: adding the 'full' parameter gets you per-process detailed + # informations, which could be nice to parse and output as metrics + resp = requests.get(status_url, auth=auth, + headers=headers(self.agentConfig), + params={'json': True}) + resp.raise_for_status() + + data = resp.json() + except Exception as e: + self.log.error("Failed to get metrics from {0}.\nError {1}".format(status_url, e)) + raise + + pool_name = data.get('pool', 'default') + metric_tags = tags + ["pool:{0}".format(pool_name)] + + for key, mname in self.GAUGES.iteritems(): + if key not in data: + self.log.warn("Gauge metric {0} is missing from FPM status".format(key)) + continue + self.gauge(mname, int(data[key]), tags=metric_tags) + + for key, mname in self.RATES.iteritems(): + if key not in data: + self.log.warn("Rate metric {0} is missing from FPM status".format(key)) + continue + self.rate(mname, int(data[key]), tags=metric_tags) + + for key, mname in self.COUNTERS.iteritems(): + if key not in data: + self.log.warn("Counter metric {0} is missing from FPM status".format(key)) + continue + self.increment(mname, int(data[key]), tags=metric_tags) + + # return pool, to tag the service check with it if we have one + return pool_name + + def _process_ping(self, ping_url, auth, tags, pool_name): + sc_tags = tags[:] + if pool_name is not None: + sc_tags.append("pool:{0}".format(pool_name)) + + try: + # TODO: adding the 'full' parameter gets you per-process detailed + # informations, which could be nice to parse and output as metrics + resp = requests.get(ping_url, auth=auth, + headers=headers(self.agentConfig)) + resp.raise_for_status() + + if 'pong' not in resp.text: + raise Exception("Received unexpected reply to ping {0}".format(resp.text)) + + except Exception as e: + self.log.error("Failed to ping FPM pool {0} on URL {1}." + "\nError {2}".format(pool_name, ping_url, e)) + self.service_check(self.SERVICE_CHECK_NAME, + AgentCheck.CRITICAL, tags=sc_tags, message=str(e)) + else: + self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=sc_tags) diff --git a/ci/phpfpm.rb b/ci/phpfpm.rb new file mode 100644 index 0000000000..29d150db2d --- /dev/null +++ b/ci/phpfpm.rb @@ -0,0 +1,85 @@ +require './ci/common' + +def nginx_version + ENV['NGINX_VERSION'] || '1.7.9' +end + +def php_version + ENV['PHP_VERSION'] || '5.6.6' +end + +def phpfpm_rootdir + "#{ENV['INTEGRATIONS_DIR']}/phpfpm_#{php_version}" +end + +namespace :ci do + namespace :phpfpm do |flavor| + task :before_install => ['ci:common:before_install'] + + task :install => ['ci:common:install'] do + unless Dir.exist? File.expand_path(phpfpm_rootdir) + sh %(curl -s -L\ + -o $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\ + http://nginx.org/download/nginx-#{nginx_version}.tar.gz) + sh %(mkdir -p #{phpfpm_rootdir}) + sh %(mkdir -p $VOLATILE_DIR/nginx) + sh %(tar zxf $VOLATILE_DIR/nginx-#{nginx_version}.tar.gz\ + -C $VOLATILE_DIR/nginx --strip-components=1) + sh %(cd $VOLATILE_DIR/nginx\ + && ./configure --prefix=#{phpfpm_rootdir} --with-http_stub_status_module\ + && make -j $CONCURRENCY\ + && make install) + sh %(curl -s -L\ + -o $VOLATILE_DIR/php-#{php_version}.tar.bz2\ + http://us1.php.net/get/php-#{php_version}.tar.bz2/from/this/mirror) + sh %(mkdir -p $VOLATILE_DIR/php) + sh %(tar jxf $VOLATILE_DIR/php-#{php_version}.tar.bz2\ + -C $VOLATILE_DIR/php --strip-components=1) + sh %(cd $VOLATILE_DIR/php\ + && ./configure --prefix=#{phpfpm_rootdir} --enable-fpm\ + && make -j $CONCURRENCY\ + && make install) + end + end + + task :before_script => ['ci:common:before_script'] do + sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/nginx.conf\ + #{phpfpm_rootdir}/conf/nginx.conf) + sh %(cp $TRAVIS_BUILD_DIR/ci/resources/phpfpm/php-fpm.conf\ + #{phpfpm_rootdir}/etc/php-fpm.conf) + sh %(#{phpfpm_rootdir}/sbin/nginx -g "pid #{ENV['VOLATILE_DIR']}/nginx.pid;") + sh %(#{phpfpm_rootdir}/sbin/php-fpm -g #{ENV['VOLATILE_DIR']}/php-fpm.pid) + end + + task :script => ['ci:common:script'] do + this_provides = [ + 'phpfpm' + ] + Rake::Task['ci:common:run_tests'].invoke(this_provides) + end + + task :cleanup => ['ci:common:cleanup'] do + sh %(kill `cat $VOLATILE_DIR/nginx.pid`) + sh %(kill `cat $VOLATILE_DIR/php-fpm.pid`) + end + + task :execute do + exception = nil + begin + %w(before_install install before_script script).each do |t| + Rake::Task["#{flavor.scope.path}:#{t}"].invoke + end + rescue => e + exception = e + puts "Failed task: #{e.class} #{e.message}".red + end + if ENV['SKIP_CLEANUP'] + puts 'Skipping cleanup, disposable environments are great'.yellow + else + puts 'Cleaning up' + Rake::Task["#{flavor.scope.path}:cleanup"].invoke + end + fail exception if exception + end + end +end diff --git a/ci/resources/phpfpm/nginx.conf b/ci/resources/phpfpm/nginx.conf new file mode 100644 index 0000000000..49b994bd95 --- /dev/null +++ b/ci/resources/phpfpm/nginx.conf @@ -0,0 +1,21 @@ +worker_processes 1; +events { + worker_connections 1024; +} +http { + include mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + server { + listen 42424; + server_name localhost; + location ~ /(status|ping|\.*\.php)$ { + root html; + fastcgi_pass 127.0.0.1:9000; + fastcgi_index index.php; + fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; + include fastcgi_params; + } + } +} diff --git a/ci/resources/phpfpm/php-fpm.conf b/ci/resources/phpfpm/php-fpm.conf new file mode 100644 index 0000000000..f9573d37f3 --- /dev/null +++ b/ci/resources/phpfpm/php-fpm.conf @@ -0,0 +1,11 @@ +[www] +user = nobody +group = nobody +listen = 127.0.0.1:9000 +pm = dynamic +pm.max_children = 5 +pm.start_servers = 2 +pm.min_spare_servers = 1 +pm.max_spare_servers = 3 +pm.status_path = /status +ping.path = /ping diff --git a/conf.d/php_fpm.yaml.example b/conf.d/php_fpm.yaml.example new file mode 100755 index 0000000000..ad2cdcffd6 --- /dev/null +++ b/conf.d/php_fpm.yaml.example @@ -0,0 +1,24 @@ +init_config: + +instances: + - # Get metrics from your FPM pool with this URL + status_url: http://localhost/status + # Get a reliable service check of you FPM pool with that one + ping_url: http://localhost/ping + # These 2 URLs should follow the options from your FPM pool + # See http://php.net/manual/en/install.fpm.configuration.php + # * pm.status_path + # * ping.path + # You should configure your fastcgi passthru (nginx/apache) to + # catch these URLs and redirect them through the FPM pool target + # you want to monitor (FPM `listen` directive in the config, usually + # a UNIX socket or TCP socket. + # + # Use this if you have basic authentication on these pages + # user: bits + # password: D4T4D0G + # + # Array of custom tags + # By default metrics and service check will be tagged by pool and host + # tags: + # - instance:foo diff --git a/tests/common.py b/tests/common.py index 60ed913080..7ce46e20f6 100644 --- a/tests/common.py +++ b/tests/common.py @@ -180,7 +180,10 @@ def coverage_report(self): tested_metrics += 1 else: untested_metrics.append(m) - coverage_metrics=100.0 * tested_metrics / total_metrics + if total_metrics == 0: + coverage_metrics = 100.0 + else: + coverage_metrics = 100.0 * tested_metrics / total_metrics total_sc = len(self.service_checks) tested_sc = 0 @@ -190,7 +193,11 @@ def coverage_report(self): tested_sc += 1 else: untested_sc.append(sc) - coverage_sc=100.0 * tested_sc / total_sc + + if total_sc == 0: + coverage_sc = 100.0 + else: + coverage_sc = 100.0 * tested_sc / total_sc coverage = """Coverage ======================================== @@ -240,8 +247,6 @@ def assertMetric(self, metric_name, value=None, tags=None, count=None, at_least= if count is not None: log.debug(" * should have exactly {0} data points".format(count)) if at_least is not None: - if count is not None: - log.warn("Tolerance param will be ignored b/c count is present") log.debug(" * should have at least {0} data points".format(at_least)) candidates = [] @@ -266,8 +271,6 @@ def assertMetricTagPrefix(self, metric_name, tag_prefix, count=None, at_least=1) if count is not None: log.debug(" * should have exactly {0} data points".format(count)) if at_least is not None: - if count is not None: - log.warn("Tolerance param will be ignored b/c count is present") log.debug(" * should have at least {0} data points".format(at_least)) candidates = [] @@ -290,8 +293,6 @@ def assertMetricTag(self, metric_name, tag, count=None, at_least=1): if count is not None: log.debug(" * should have exactly {0} data points".format(count)) if at_least is not None: - if count is not None: - log.warn("Tolerance param will be ignored b/c count is present") log.debug(" * should have at least {0} data points".format(at_least)) candidates = [] diff --git a/tests/test_php_fpm.py b/tests/test_php_fpm.py new file mode 100644 index 0000000000..fa613030f5 --- /dev/null +++ b/tests/test_php_fpm.py @@ -0,0 +1,90 @@ +# stdlib +import time + +# 3p +from nose.plugins.attrib import attr + +# project +from checks import AgentCheck +from tests.common import AgentCheckTest + +# sample from /status?json +# { +# "accepted conn": 350, +# "active processes": 1, +# "idle processes": 2, +# "listen queue": 0, +# "listen queue len": 0, +# "max active processes": 2, +# "max children reached": 0, +# "max listen queue": 0, +# "pool": "www", +# "process manager": "dynamic", +# "slow requests": 0, +# "start since": 4758, +# "start time": 1426601833, +# "total processes": 3 +# } + + +@attr(requires='phpfpm') +class PHPFPMCheckTest(AgentCheckTest): + CHECK_NAME = 'php_fpm' + + def test_bad_status(self): + instance = { + 'status_url': 'http://localhost:9001/status', + 'tags': ['expectedbroken'] + } + + self.assertRaises(Exception, self.run_check, {'instances': [instance]}) + + def test_bad_ping(self): + instance = { + 'ping_url': 'http://localhost:9001/status', + 'tags': ['expectedbroken'] + } + + self.run_check({'instances': [instance]}) + self.assertServiceCheck( + 'php_fpm.can_ping', + status=AgentCheck.CRITICAL, + tags=['expectedbroken'], + count=1 + ) + + self.coverage_report() + + def test_status(self): + instance = { + 'status_url': 'http://localhost:42424/status', + 'ping_url': 'http://localhost:42424/ping', + 'tags': ['cluster:forums'] + } + + self.run_check({'instances': [instance]}) + + metrics = [ + 'php_fpm.listen_queue.size', + 'php_fpm.processes.idle', + 'php_fpm.processes.active', + 'php_fpm.processes.total', + 'php_fpm.requests.slow', + 'php_fpm.requests.accepted', + ] + + expected_tags = ['cluster:forums', 'pool:www'] + + for mname in metrics: + self.assertMetric(mname, count=1, tags=expected_tags) + + self.assertMetric('php_fpm.processes.idle', count=1, value=1) + self.assertMetric('php_fpm.processes.total', count=1, value=2) + + self.assertServiceCheck('php_fpm.can_ping', status=AgentCheck.OK, + count=1) + time.sleep(1) + + # Run check second time to get the rate + self.run_check({'instances': [instance]}) + self.assertMetric('php_fpm.processes.max_reached', count=1)