Skip to content

Commit

Permalink
Merge #52136
Browse files Browse the repository at this point in the history
52136: kvserver: fix tenant metrics for RHS of split r=ajwerner a=ajwerner

In the change which introduced tenant storage metrics we were erroneously
applying the MVCC delta to the LHS rather than the RHS of the split. This
wasn't so obvious because prior to the change we used the LHS to get a
handle to the store's metrics because we knew they were the same store.

I've manually verified that this fixes the odd behavior we were seeing.

Release note: None

Co-authored-by: Andrew Werner <ajwerner@cockroachlabs.com>
  • Loading branch information
craig[bot] and ajwerner committed Jul 31, 2020
2 parents a864849 + 7c61eda commit 0df9caf
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 5 deletions.
130 changes: 130 additions & 0 deletions pkg/kv/kvserver/client_tenant_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2020 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package kvserver_test

import (
"bufio"
"bytes"
"context"
"regexp"
"strconv"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/require"
)

// TestTenantsStorageMetrics ensures that tenant storage metrics are properly
// set upon split. There's two interesting cases:
//
// 1) The common case where the RHS and LHS of the split are co-located
// 2) The rare case where the RHS of the split has already been removed from
// the store by the time the LHS applies the split.
//
// This test at time of writing only deals with ensuring that 1) is covered.
// TODO(ajwerner): add a test for 2).
func TestTenantsStorageMetricsOnSplit(t *testing.T) {
defer leaktest.AfterTest(t)()

s, _, db := serverutils.StartServer(t, base.TestServerArgs{})

ctx := context.Background()
store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID())
require.NoError(t, err)
defer store.Stopper().Stop(ctx)

tenantID := roachpb.MakeTenantID(10)
codec := keys.MakeSQLCodec(tenantID)

tenantPrefix := codec.TenantPrefix()
_, _, err = s.SplitRange(tenantPrefix)
require.NoError(t, err)

splitKey := codec.TablePrefix(42)
_, _, err = s.SplitRange(splitKey)
require.NoError(t, err)

require.NoError(t, db.Put(ctx, codec.TablePrefix(41), "bazbax"))
require.NoError(t, db.Put(ctx, splitKey, "foobar"))

// We want it to be the case that the MVCC stats for the individual ranges
// of our tenant line up with the metrics for the tenant.
check := func() error {
var aggregateStats enginepb.MVCCStats
var seen int
store.VisitReplicas(func(replica *kvserver.Replica) (wantMore bool) {
ri := replica.State()
if ri.TenantID != tenantID.ToUint64() {
return true
}
seen++
aggregateStats.Add(*ri.Stats)
return true
})
ex := metric.MakePrometheusExporter()
ex.ScrapeRegistry(store.Registry(), true /* includeChildMetrics */)
var in bytes.Buffer
if err := ex.PrintAsText(&in); err != nil {
t.Fatalf("failed to print prometheus data: %v", err)
}
if seen != 2 {
return errors.Errorf("expected to see two replicas for our tenant, saw %d", seen)
}
sc := bufio.NewScanner(&in)
re := regexp.MustCompile(`^(\w+)\{.*,tenant_id="` + tenantID.String() + `"\} (\d+)`)
metricsToVal := map[string]int64{
"gcbytesage": aggregateStats.GCBytesAge,
"intentage": aggregateStats.IntentAge,
"livebytes": aggregateStats.LiveBytes,
"livecount": aggregateStats.LiveCount,
"keybytes": aggregateStats.KeyBytes,
"keycount": aggregateStats.KeyCount,
"valbytes": aggregateStats.ValBytes,
"valcount": aggregateStats.ValCount,
"intentbytes": aggregateStats.IntentBytes,
"intentcount": aggregateStats.IntentCount,
"sysbytes": aggregateStats.SysBytes,
"syscount": aggregateStats.SysCount,
}
for sc.Scan() {
matches := re.FindAllStringSubmatch(sc.Text(), 1)
if matches == nil {
continue
}
metric, valStr := matches[0][1], matches[0][2]
exp, ok := metricsToVal[matches[0][1]]
if !ok {
continue
}
val, err := strconv.ParseInt(valStr, 10, 64)
require.NoError(t, err)
if exp != val {
return errors.Errorf("value for %s %d != %d", metric, val, exp)
}
delete(metricsToVal, metric)
}
if len(metricsToVal) != 0 {
return errors.Errorf("did not see all metrics, still expect %v", metricsToVal)
}
return nil
}
testutils.SucceedsSoon(t, check)

}
15 changes: 10 additions & 5 deletions pkg/kv/kvserver/store_split.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ func splitPreApply(
}

// splitPostApply is the part of the split trigger which coordinates the actual
// split with the Store. Requires that Replica.raftMu is held.
// split with the Store. Requires that Replica.raftMu is held. The deltaMS are
// the MVCC stats which apply to the RHS and have already been removed from the
// LHS.
func splitPostApply(
ctx context.Context, deltaMS enginepb.MVCCStats, split *roachpb.SplitTrigger, r *Replica,
) {
Expand All @@ -173,10 +175,13 @@ func splitPostApply(
}

// Update store stats with difference in stats before and after split.
if tenantID, ok := r.TenantID(); ok {
r.store.metrics.addMVCCStats(ctx, tenantID, deltaMS)
} else {
log.Fatalf(ctx, "%s: found replica which is part of a split without a valid tenant ID", r)
if rightReplOrNil != nil {
if tenantID, ok := rightReplOrNil.TenantID(); ok {
rightReplOrNil.store.metrics.addMVCCStats(ctx, tenantID, deltaMS)
} else {
log.Fatalf(ctx, "%s: found replica which is RHS of a split "+
"without a valid tenant ID", rightReplOrNil)
}
}

now := r.store.Clock().Now()
Expand Down

0 comments on commit 0df9caf

Please sign in to comment.