Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework StatsD usage to use separate client #594

Merged
merged 4 commits into from
Nov 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
*Enhancements*
- Add support for deploying resources that use `generateName` ([#608](https://github.com/Shopify/kubernetes-deploy/pull/608))

*Other*
- Refactor StatsD usage so we can depend on the latest version again. ([#594](https://github.com/Shopify/kubernetes-deploy/pull/594))

## 0.30.0

*Enhancements*
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ Using another local cluster:
2. Put the name of the context you want to use in a file named `.local-context` in the root of this project. For example: `echo "dind" > .local-context`.
3. Run `bundle exec rake test` (or `dev test` if you work for Shopify).

To make StatsD log what it would have emitted, run a test with `STATSD_DEV=1`.
To make StatsD log what it would have emitted, run a test with `STATSD_ENV=development`.

To see the full-color output of a specific integration test, you can use `PRINT_LOGS=1`. For example: `PRINT_LOGS=1 bundle exec ruby -I test test/integration/krane_deploy_test.rb -n/test_name/`.

Expand Down
2 changes: 1 addition & 1 deletion kubernetes-deploy.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
spec.add_dependency("googleauth", "~> 0.8.0")
spec.add_dependency("ejson", "~> 1.0")
spec.add_dependency("colorize", "~> 0.8")
spec.add_dependency("statsd-instrument", '~> 2.3.2')
spec.add_dependency("statsd-instrument", ['>= 2.8', "< 3.1"])
spec.add_dependency("oj", "~> 3.0")
spec.add_dependency("concurrent-ruby", "~> 1.1")
spec.add_dependency("jsonpath", "~> 0.9.6")
Expand Down
1 change: 0 additions & 1 deletion lib/krane/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,4 @@

module Krane
MIN_KUBE_VERSION = '1.11.0'
StatsD.build
end
21 changes: 12 additions & 9 deletions lib/krane/deprecated_deploy_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -200,25 +200,28 @@ def run!(verify_result: true, allow_protected_ns: false, prune: true)
MSG
@logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
end
StatsD.event("Deployment of #{@namespace} succeeded",
StatsD.client.event("Deployment of #{@namespace} succeeded",
"Successfully deployed all #{@namespace} resources to #{@context}",
alert_type: "success", tags: statsd_tags << "status:success")
StatsD.distribution('all_resources.duration', StatsD.duration(start), tags: statsd_tags << "status:success")
alert_type: "success", tags: statsd_tags + %w(status:success))
StatsD.client.distribution('all_resources.duration', StatsD.duration(start),
tags: statsd_tags + %w(status:success))
@logger.print_summary(:success)
rescue DeploymentTimeoutError
@logger.print_summary(:timed_out)
StatsD.event("Deployment of #{@namespace} timed out",
StatsD.client.event("Deployment of #{@namespace} timed out",
"One or more #{@namespace} resources failed to deploy to #{@context} in time",
alert_type: "error", tags: statsd_tags << "status:timeout")
StatsD.distribution('all_resources.duration', StatsD.duration(start), tags: statsd_tags << "status:timeout")
alert_type: "error", tags: statsd_tags + %w(status:timeout))
StatsD.client.distribution('all_resources.duration', StatsD.duration(start),
tags: statsd_tags + %w(status:timeout))
raise
rescue FatalDeploymentError => error
@logger.summary.add_action(error.message) if error.message != error.class.to_s
@logger.print_summary(:failure)
StatsD.event("Deployment of #{@namespace} failed",
StatsD.client.event("Deployment of #{@namespace} failed",
"One or more #{@namespace} resources failed to deploy to #{@context}",
alert_type: "error", tags: statsd_tags << "status:failed")
StatsD.distribution('all_resources.duration', StatsD.duration(start), tags: statsd_tags << "status:failed")
alert_type: "error", tags: statsd_tags + %w(status:failed))
StatsD.client.distribution('all_resources.duration', StatsD.duration(start),
tags: statsd_tags + %w(status:failed))
raise
end

Expand Down
2 changes: 1 addition & 1 deletion lib/krane/kubectl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def run(*args, log_failure: nil, use_context: true, use_namespace: true, output:
else
logger.debug("Kubectl err: #{output_is_sensitive ? '<suppressed sensitive output>' : err}")
end
StatsD.increment('kubectl.error', 1, tags: { context: context, namespace: namespace, cmd: cmd[1] })
StatsD.client.increment('kubectl.error', 1, tags: { context: context, namespace: namespace, cmd: cmd[1] })

break unless retriable_err?(err, retry_whitelist) && current_attempt < attempts
sleep(retry_delay(current_attempt))
Expand Down
2 changes: 1 addition & 1 deletion lib/krane/kubernetes_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def pretty_status

def report_status_to_statsd(watch_time)
unless @statsd_report_done
StatsD.distribution('resource.duration', watch_time, tags: statsd_tags)
StatsD.client.distribution('resource.duration', watch_time, tags: statsd_tags)
@statsd_report_done = true
end
end
Expand Down
6 changes: 3 additions & 3 deletions lib/krane/restart_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ def run!(deployments_names = nil, selector: nil, verify_result: true)
warning = "Result verification is disabled for this task"
@logger.summary.add_paragraph(ColorizedString.new(warning).yellow)
end
StatsD.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments))
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('success', deployments))
@logger.print_summary(:success)
rescue DeploymentTimeoutError
StatsD.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments))
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('timeout', deployments))
@logger.print_summary(:timed_out)
raise
rescue FatalDeploymentError => error
StatsD.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments))
StatsD.client.distribution('restart.duration', StatsD.duration(start), tags: tags('failure', deployments))
@logger.summary.add_action(error.message) if error.message != error.class.to_s
@logger.print_summary(:failure)
raise
Expand Down
6 changes: 3 additions & 3 deletions lib/krane/runner_task.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ def run!(task_template:, entrypoint:, args:, env_vars: [], verify_result: true)
else
record_status_once(pod)
end
StatsD.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('success'))
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('success'))
@logger.print_summary(:success)
rescue DeploymentTimeoutError
StatsD.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('timeout'))
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('timeout'))
@logger.print_summary(:timed_out)
raise
rescue FatalDeploymentError
StatsD.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('failure'))
StatsD.client.distribution('task_runner.duration', StatsD.duration(start), tags: statsd_tags('failure'))
@logger.print_summary(:failure)
raise
end
Expand Down
36 changes: 11 additions & 25 deletions lib/krane/statsd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,25 @@

module Krane
class StatsD
extend ::StatsD

PREFIX = "KubernetesDeploy"

def self.duration(start_time)
(Time.now.utc - start_time).round(1)
end

def self.build
if ENV['STATSD_DEV'].present?
self.backend = ::StatsD::Instrument::Backends::LoggerBackend.new(Logger.new($stderr))
elsif ENV['STATSD_ADDR'].present?
statsd_impl = ENV['STATSD_IMPLEMENTATION'].present? ? ENV['STATSD_IMPLEMENTATION'] : "datadog"
self.backend = ::StatsD::Instrument::Backends::UDPBackend.new(ENV['STATSD_ADDR'], statsd_impl)
else
self.backend = ::StatsD::Instrument::Backends::NullBackend.new
def self.client
@client ||= begin
sink = if ::StatsD::Instrument::Environment.current.env.fetch('STATSD_ENV', nil) == 'development'
::StatsD::Instrument::LogSink.new(Logger.new($stderr))
elsif (addr = ::StatsD::Instrument::Environment.current.env.fetch('STATSD_ADDR', nil))
::StatsD::Instrument::UDPSink.for_addr(addr)
else
::StatsD::Instrument::NullSink.new
end
::StatsD::Instrument::Client.new(prefix: PREFIX, sink: sink, default_sample_rate: 1.0)
end
end

# It is not sufficient to set the prefix field on the Krane::StatsD singleton itself, since its value
# is overridden in the underlying calls to the ::StatsD library, hence the need to pass it in as a custom prefix
# via the metric_options hash. This is done since Krane may be included as a library and should not
# change the global StatsD configuration of the importing application.
def self.increment(key, value = 1, **metric_options)
metric_options[:prefix] = PREFIX
super
end

def self.distribution(key, value = nil, **metric_options, &block)
metric_options[:prefix] = PREFIX
super
end

module MeasureMethods
def measure_method(method_name, metric = nil)
unless method_defined?(method_name) || private_method_defined?(method_name)
Expand Down Expand Up @@ -64,7 +50,7 @@ def measure_method(method_name, metric = nil)
dynamic_tags << "error:#{error}" if dynamic_tags.is_a?(Array)
end

StatsD.distribution(
Krane::StatsD.client.distribution(
metric,
Krane::StatsD.duration(start_time),
tags: dynamic_tags
Expand Down
17 changes: 0 additions & 17 deletions test/helpers/statsd_helper.rb

This file was deleted.

6 changes: 3 additions & 3 deletions test/integration-serial/serial_deploy_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require 'integration_test_helper'

class SerialDeployTest < Krane::IntegrationTest
include StatsDHelper
include StatsD::Instrument::Assertions
# This cannot be run in parallel because it either stubs a constant or operates in a non-exclusive namespace
def test_deploying_to_protected_namespace_with_override_does_not_prune
assert_deploy_success(deploy_fixtures("hello-cloud", subset: ['configmap-data.yml', 'disruption-budgets.yml'],
Expand Down Expand Up @@ -274,7 +274,7 @@ def test_custom_resources_predeployed
def test_stage_related_metrics_include_custom_tags_from_namespace
hello_cloud = FixtureSetAssertions::HelloCloud.new(@namespace)
kubeclient.patch_namespace(hello_cloud.namespace, metadata: { labels: { foo: 'bar' } })
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
assert_deploy_success deploy_fixtures("hello-cloud", subset: ["configmap-data.yml"], wait: false)
end

Expand All @@ -295,7 +295,7 @@ def test_stage_related_metrics_include_custom_tags_from_namespace
end

def test_all_expected_statsd_metrics_emitted_with_essential_tags
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
result = deploy_fixtures('hello-cloud', subset: ['configmap-data.yml'], wait: false, sha: 'test-sha')
assert_deploy_success(result)
end
Expand Down
8 changes: 4 additions & 4 deletions test/integration-serial/serial_task_run_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

class SerialTaskRunTest < Krane::IntegrationTest
include TaskRunnerTestHelper
include StatsDHelper
include StatsD::Instrument::Assertions

# Mocha is not thread-safe: https://github.com/freerange/mocha#thread-safety
def test_run_without_verify_result_fails_if_pod_was_not_created
Expand Down Expand Up @@ -35,7 +35,7 @@ def test_failure_statsd_metric_emitted
bad_ns = "missing"
task_runner = build_task_runner(ns: bad_ns)

metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
result = task_runner.run(run_params)
assert_task_run_failure(result)
end
Expand All @@ -52,7 +52,7 @@ def test_success_statsd_metric_emitted
deploy_task_template
task_runner = build_task_runner

metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
result = task_runner.run(run_params.merge(verify_result: false))
assert_task_run_success(result)
end
Expand All @@ -69,7 +69,7 @@ def test_timedout_statsd_metric_emitted
deploy_task_template
task_runner = build_task_runner(max_watch_seconds: 0)

metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
result = task_runner.run(run_params.merge(args: ["sleep 5"]))
assert_task_run_failure(result, :timed_out)
end
Expand Down
1 change: 1 addition & 0 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
require 'ruby-prof'
require 'ruby-prof-flamegraph'
end
ENV["STATSD_USE_NEW_CLIENT"] ||= "1" # support forwards compatibility with v3.0

$LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
require 'krane'
Expand Down
4 changes: 2 additions & 2 deletions test/unit/krane/kubectl_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require 'test_helper'

class KubectlTest < Krane::TestCase
include StatsDHelper
include StatsD::Instrument::Assertions
include EnvTestHelper

def setup
Expand Down Expand Up @@ -109,7 +109,7 @@ def test_run_with_multiple_attempts_retries_and_emits_failure_metrics
kubectl = build_kubectl
kubectl.expects(:retry_delay).returns(0).times(4)

metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
_out, _err, st = kubectl.run("get", "pods", attempts: 5)
refute_predicate st, :success?
end
Expand Down
48 changes: 8 additions & 40 deletions test/unit/krane/statsd_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
require 'test_helper'

class StatsDTest < Krane::TestCase
include StatsDHelper
include StatsD::Instrument::Assertions

class TestMeasureClass
extend(Krane::StatsD::MeasureMethods)

Expand Down Expand Up @@ -31,39 +32,6 @@ def thing_to_measure; end
measure_method :thing_to_measure
end

def test_build_when_statsd_addr_env_present_but_statsd_implementation_is_not
original_addr = ENV['STATSD_ADDR']
ENV['STATSD_ADDR'] = '127.0.0.1'
original_impl = ENV['STATSD_IMPLEMENTATION']
ENV['STATSD_IMPLEMENTATION'] = nil
original_dev = ENV['STATSD_DEV']
ENV['STATSD_DEV'] = nil

Krane::StatsD.build

assert_equal(:datadog, Krane::StatsD.backend.implementation)
ensure
ENV['STATSD_ADDR'] = original_addr
ENV['STATSD_IMPLEMENTATION'] = original_impl
ENV['STATSD_DEV'] = original_dev
Krane::StatsD.build
end

def test_kubernetes_statsd_does_not_override_global_config
old_prefix = ::StatsD.prefix
old_sample_rate = ::StatsD.default_sample_rate

::StatsD.prefix = "test"
::StatsD.default_sample_rate = 2.0
Krane::StatsD.build
refute_equal(Krane::StatsD.prefix, ::StatsD.prefix)
refute_equal(Krane::StatsD.default_sample_rate, ::StatsD.default_sample_rate)
refute_equal(Krane::StatsD.backend, ::StatsD.backend)
ensure
::StatsD.prefix = old_prefix
::StatsD.default_sample_rate = old_sample_rate
end

def test_measuring_non_existent_method_raises
assert_raises_message(NotImplementedError, "Cannot instrument undefined method bogus_method") do
TestMeasureClass.measure_method(:bogus_method)
Expand All @@ -75,7 +43,7 @@ def test_measure_method_does_not_change_the_return_value
end

def test_measure_method_uses_expected_name_and_tags
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
TestMeasureClass.new.thing_to_measure
end
assert_predicate(metrics, :one?, "Expected 1 metric, got #{metrics.length}")
Expand All @@ -84,7 +52,7 @@ def test_measure_method_uses_expected_name_and_tags
end

def test_measure_method_with_custom_metric_name
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
TestMeasureClass.new.measure_with_custom_metric
end
assert_predicate(metrics, :one?, "Expected 1 metric, got #{metrics.length}")
Expand All @@ -93,16 +61,16 @@ def test_measure_method_with_custom_metric_name
end

def test_measure_method_with_statsd_tags_undefined
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
TestMeasureNoTags.new.thing_to_measure
end
assert_predicate(metrics, :one?, "Expected 1 metric, got #{metrics.length}")
assert_equal("KubernetesDeploy.thing_to_measure.duration", metrics.first.name)
assert_empty(metrics.first.tags)
assert_nil(metrics.first.tags)
end

def test_measure_method_that_raises_with_hash_tags
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
tester = TestMeasureClass.new
tester.expects(:statsd_tags).returns(test: true)
assert_raises(ArgumentError) do
Expand All @@ -115,7 +83,7 @@ def test_measure_method_that_raises_with_hash_tags
end

def test_measure_method_that_raises_with_array_tags
metrics = capture_statsd_calls do
metrics = capture_statsd_calls(client: Krane::StatsD.client) do
tester = TestMeasureClass.new
tester.expects(:statsd_tags).returns(["test:true"])
assert_raises(ArgumentError) do
Expand Down