Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e: Fixed WaitSumMetrics to fail on non existing metric #2256

Merged
merged 3 commits into from
Mar 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jobs:
export CORTEX_IMAGE="${CORTEX_IMAGE_PREFIX}cortex:${CIRCLE_TAG:-$(./tools/image-tag)}"
export CORTEX_CHECKOUT_DIR="/home/circleci/.go_workspace/src/github.com/cortexproject/cortex"
echo "Running integration tests with image: $CORTEX_IMAGE"
go test -tags=integration -timeout 300s -v -count=1 ./integration/...
go test -tags=requires_docker -timeout 300s -v -count=1 ./integration/...

build:
<<: *defaults
Expand Down
5 changes: 3 additions & 2 deletions integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@

## Running

Integration tests have `integration` tag (`// +build integration` line followed by empty line on top of Go files), to avoid running them unintentionally, e.g. by `go test ./...` in main Cortex package.
Integration tests have `requires_docker` tag (`// +build requires_docker` line followed by empty line on top of Go files),
to avoid running them unintentionally as they require docker, e.g. by `go test ./...` in main Cortex package.

To run integration tests, one needs to run `go test -tags=integration ./integration/...` (or just `go test -tags=integration ./...` to run unit tests as well).
To run integration tests, one needs to run `go test -tags=requires_docker ./integration/...` (or just `go test -tags=requires_docker ./...` to run unit tests as well).

## Owners

Expand Down
2 changes: 1 addition & 1 deletion integration/alertmanager_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
2 changes: 1 addition & 1 deletion integration/api_config_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
23 changes: 13 additions & 10 deletions integration/backward_compatibility_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -36,20 +36,23 @@ func TestBackwardCompatibilityWithChunksStorage(t *testing.T) {
"-config-yaml": ChunksStorageFlags["-schema-config-file"],
})

// Start Cortex components (ingester running on previous version).
require.NoError(t, writeFileToSharedDir(s, cortexSchemaConfigFile, []byte(cortexSchemaConfigYaml)))
tableManager := e2ecortex.NewTableManager("table-manager", flagsForOldImage, previousVersionImage)
// Old table-manager doesn't expose a readiness probe, so we just check if the / returns 404
tableManager.SetReadinessProbe(e2e.NewReadinessProbe(tableManager.HTTPPort(), "/", 404))

// Start Cortex table-manager (running on current version since the backward compatibility
// test is about testing a rolling update of other services).
tableManager := e2ecortex.NewTableManager("table-manager", ChunksStorageFlags, "")
require.NoError(t, s.StartAndWaitReady(tableManager))

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))

// Start other Cortex components (ingester running on previous version).
ingester1 := e2ecortex.NewIngester("ingester-1", consul.NetworkHTTPEndpoint(), flagsForOldImage, "")
distributor := e2ecortex.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), flagsForOldImage, "")
// Old ring didn't have /ready probe, use /ring instead.
distributor.SetReadinessProbe(e2e.NewReadinessProbe(distributor.HTTPPort(), "/ring", 200))
require.NoError(t, s.StartAndWaitReady(distributor, ingester1, tableManager))

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, s.StartAndWaitReady(distributor, ingester1))

// Wait until the distributor has updated the ring.
require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
Expand Down
4 changes: 2 additions & 2 deletions integration/chunks_storage_backends_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -55,7 +55,7 @@ func TestChunksStorageAllIndexBackends(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))
require.NoError(t, s.Stop(tableManager))
}

Expand Down
2 changes: 1 addition & 1 deletion integration/configs.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
2 changes: 1 addition & 1 deletion integration/e2e/scenario_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The meaning we gave to this tag was "a test requiring Docker". Does it cause you any trouble keeping it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, my point is that it's not an integration test so technically this tag is wrong.

What if change this tag to requires_docker? (:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did that (:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't care too much how it's called, but feel like this is change just for the change. What if we have integration tests that don't use docker in the future? README.md is not updated.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also update integration/README.md accordingly?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idea behind using a tag here was to avoid running tests in integration dir when doing go test ./... from main Cortex package. I simply used tag name that matched the directory name.

Copy link
Contributor Author

@bwplotka bwplotka Mar 13, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but feel like this is change just for the change

Just seen this and... what? What do you mean? Why I would put on myself more work to improve something in Cortex for no reason? @pstibrany Sorry, I don't get it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just don't feel that renaming the tag was necessary, but ... whatever works. I don't care too much about the name as long as there is a tag in the first place.

// +build requires_docker

package e2e_test

Expand Down
16 changes: 11 additions & 5 deletions integration/e2e/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ func sumValues(family *io_prometheus_client.MetricFamily) float64 {
return sum
}

// Equals is an isExpected function for WaitSumMetrics that returns true if given single sum is equals to given value.
func Equals(value float64) func(sums ...float64) bool {
return func(sums ...float64) bool {
if len(sums) != 1 {
Expand All @@ -470,6 +471,7 @@ func Equals(value float64) func(sums ...float64) bool {
}
}

// Greater is an isExpected function for WaitSumMetrics that returns true if given single sum is greater than given value.
func Greater(value float64) func(sums ...float64) bool {
return func(sums ...float64) bool {
if len(sums) != 1 {
Expand All @@ -479,6 +481,7 @@ func Greater(value float64) func(sums ...float64) bool {
}
}

// Less is an isExpected function for WaitSumMetrics that returns true if given single sum is less than given value.
func Less(value float64) func(sums ...float64) bool {
return func(sums ...float64) bool {
if len(sums) != 1 {
Expand All @@ -488,7 +491,7 @@ func Less(value float64) func(sums ...float64) bool {
}
}

// EqualsAmongTwo returns true if first sum is equal to the second.
// EqualsAmongTwo is an isExpected function for WaitSumMetrics that returns true if first sum is equal to the second.
// NOTE: Be careful on scrapes in between of process that changes two metrics. Those are
// usually not atomic.
func EqualsAmongTwo(sums ...float64) bool {
Expand All @@ -498,7 +501,7 @@ func EqualsAmongTwo(sums ...float64) bool {
return sums[0] == sums[1]
}

// GreaterAmongTwo returns true if first sum is greater than second.
// GreaterAmongTwo is an isExpected function for WaitSumMetrics that returns true if first sum is greater than second.
// NOTE: Be careful on scrapes in between of process that changes two metrics. Those are
// usually not atomic.
func GreaterAmongTwo(sums ...float64) bool {
Expand All @@ -508,7 +511,7 @@ func GreaterAmongTwo(sums ...float64) bool {
return sums[0] > sums[1]
}

// LessAmongTwo returns true if first sum is smaller than second.
// LessAmongTwo is an isExpected function for WaitSumMetrics that returns true if first sum is smaller than second.
// NOTE: Be careful on scrapes in between of process that changes two metrics. Those are
// usually not atomic.
func LessAmongTwo(sums ...float64) bool {
Expand All @@ -518,6 +521,8 @@ func LessAmongTwo(sums ...float64) bool {
return sums[0] < sums[1]
}

// WaitSumMetrics waits for at least one instance of each given metric names to be present and their sums, returning true
// when passed to given isExpected(...).
func (s *HTTPService) WaitSumMetrics(isExpected func(sums ...float64) bool, metricNames ...string) error {
sums := make([]float64, len(metricNames))

Expand All @@ -537,10 +542,11 @@ func (s *HTTPService) WaitSumMetrics(isExpected func(sums ...float64) bool, metr
sums[i] = 0.0

// Check if the metric is exported.
mf, ok := families[m]
if ok {
if mf, ok := families[m]; ok {
sums[i] = sumValues(mf)
continue
}
return errors.Errorf("metric %s not found in %s metric page", m, s.name)
}

if isExpected(sums...) {
Expand Down
4 changes: 3 additions & 1 deletion integration/e2e/service_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package e2e

Expand Down Expand Up @@ -107,6 +107,8 @@ metric_b_summary_count 1

require.NoError(t, s.WaitSumMetrics(LessAmongTwo, "metric_a", "metric_b"))
require.Error(t, s.WaitSumMetrics(LessAmongTwo, "metric_b", "metric_a"))

require.Error(t, s.WaitSumMetrics(Equals(0), "non_existing_metric"))
}

func TestWaitSumMetric_Nan(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion integration/getting_started_single_process_config_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
4 changes: 2 additions & 2 deletions integration/ingester_flush_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -42,7 +42,7 @@ func TestIngesterFlushWithChunksStorage(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))

// Wait until both the distributor and querier have updated the ring.
require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
Expand Down
4 changes: 2 additions & 2 deletions integration/ingester_hand_over_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -34,7 +34,7 @@ func TestIngesterHandOverWithChunksStorage(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))
})
}

Expand Down
2 changes: 1 addition & 1 deletion integration/integration_memberlist_single_binary_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
4 changes: 2 additions & 2 deletions integration/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -44,7 +44,7 @@ func TestExportedMetrics(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))

// Wait until both the distributor and querier have updated the ring.
require.NoError(t, distributor.WaitSumMetrics(e2e.Equals(512), "cortex_ring_tokens_total"))
Expand Down
6 changes: 3 additions & 3 deletions integration/querier_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -83,9 +83,9 @@ func TestQuerierWithBlocksStorage(t *testing.T) {
// Wait until the TSDB head is compacted and shipped to the storage.
// The shipped block contains the 1st series, while the 2ns series in in the head.
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_shipper_uploads_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_memory_series"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(2), "cortex_ingester_memory_series_created_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_memory_series_removed_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_memory_series"))

// Push another series to further compact another block and delete the first block
// due to expired retention.
Expand All @@ -97,9 +97,9 @@ func TestQuerierWithBlocksStorage(t *testing.T) {
require.Equal(t, 200, res.StatusCode)

require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(2), "cortex_ingester_shipper_uploads_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_memory_series"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(3), "cortex_ingester_memory_series_created_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(2), "cortex_ingester_memory_series_removed_total"))
require.NoError(t, ingester.WaitSumMetrics(e2e.Equals(1), "cortex_ingester_memory_series"))

// Wait until the querier has synched the new uploaded blocks.
require.NoError(t, querier.WaitSumMetrics(e2e.Equals(2), "cortex_querier_bucket_store_blocks_loaded"))
Expand Down
6 changes: 3 additions & 3 deletions integration/query_frontend_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down Expand Up @@ -52,7 +52,7 @@ func TestQueryFrontendWithChunksStorageViaFlags(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))

return "", ChunksStorageFlags
})
Expand All @@ -71,7 +71,7 @@ func TestQueryFrontendWithChunksStorageViaConfigFile(t *testing.T) {

// Wait until the first table-manager sync has completed, so that we're
// sure the tables have been created.
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_dynamo_sync_tables_seconds"))
require.NoError(t, tableManager.WaitSumMetrics(e2e.Greater(0), "cortex_table_manager_sync_success_timestamp_seconds"))

return cortexConfigFile, e2e.EmptyFlags()
})
Expand Down
2 changes: 1 addition & 1 deletion integration/util.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build integration
// +build requires_docker

package main

Expand Down
11 changes: 9 additions & 2 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ const (
// a URL derived from Config.AutoWebhookRoot
autoWebhookURL = "http://internal.monitor"

configStatusValid = "valid"
configStatusInvalid = "invalid"

statusPage = `
<!doctype html>
<html>
Expand Down Expand Up @@ -76,6 +79,10 @@ var (
)

func init() {
// Ensure the metric values are initialized.
totalConfigs.WithLabelValues(configStatusInvalid).Set(0)
totalConfigs.WithLabelValues(configStatusValid).Set(0)

prometheus.MustRegister(totalConfigs)
statusTemplate = template.Must(template.New("statusPage").Funcs(map[string]interface{}{
"state": func(enabled bool) string {
Expand Down Expand Up @@ -313,8 +320,8 @@ func (am *MultitenantAlertmanager) syncConfigs(cfgs map[string]alerts.AlertConfi
level.Info(am.logger).Log("msg", "deactivated per-tenant alertmanager", "user", user)
}
}
totalConfigs.WithLabelValues("invalid").Set(float64(invalid))
totalConfigs.WithLabelValues("valid").Set(float64(len(am.cfgs) - invalid))
totalConfigs.WithLabelValues(configStatusInvalid).Set(float64(invalid))
totalConfigs.WithLabelValues(configStatusValid).Set(float64(len(am.cfgs) - invalid))
}

func (am *MultitenantAlertmanager) transformConfig(userID string, amConfig *amconfig.Config) (*amconfig.Config, error) {
Expand Down
23 changes: 18 additions & 5 deletions pkg/chunk/table_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ const (
)

type tableManagerMetrics struct {
syncTableDuration *prometheus.HistogramVec
tableCapacity *prometheus.GaugeVec
createFailures prometheus.Gauge
deleteFailures prometheus.Gauge
syncTableDuration *prometheus.HistogramVec
tableCapacity *prometheus.GaugeVec
createFailures prometheus.Gauge
deleteFailures prometheus.Gauge
lastSuccessfulSync prometheus.Gauge
}

func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
Expand Down Expand Up @@ -61,12 +62,19 @@ func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
Help: "Number of table deletion failures during the last table-manager reconciliation",
})

m.lastSuccessfulSync = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "cortex",
Name: "table_manager_sync_success_timestamp_seconds",
Help: "Timestamp of the last successful table manager sync.",
})

if r != nil {
r.MustRegister(
m.syncTableDuration,
m.tableCapacity,
m.createFailures,
m.deleteFailures,
m.lastSuccessfulSync,
)
}

Expand Down Expand Up @@ -259,7 +267,12 @@ func (m *TableManager) SyncTables(ctx context.Context) error {
return err
}

return m.updateTables(ctx, toCheckThroughput)
if err := m.updateTables(ctx, toCheckThroughput); err != nil {
return err
}

m.metrics.lastSuccessfulSync.SetToCurrentTime()
return nil
}

func (m *TableManager) calculateExpectedTables() []TableDesc {
Expand Down