cockroachdb · tbg · Oct 28, 2021 · Mar 4, 2022 · Mar 4, 2022 · Mar 4, 2022
@@ -0,0 +1,147 @@
+// Copyright 2021 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package metric
+
+import (
+	"context"
+	"time"
+)
+
+// A Timing is a container that stores Events resulting from a single-threaded
+// execution of an operation, which it thus breaks up into contiguous (i.e.
+// non-overlapping and without gaps) time intervals. The use of a Timing
+// integrates easily into existing code paths and interacts well with both
+// metrics and tracing.
+//
+// With each event added, the caller learns the duration elapsed since the last
+// event (or another previous event of choice), which facilitates recording the
+// durations of sub-operations to metrics. A "middleware" OnEvent can be set up
+// to tie into tracing, so that existing calls to `log.Event` can be lifted to
+// calls to `Timing.Event`.
+type Timing struct {
+	Now func() int64
+	// OnEvent is called every time Event is called. It will be invoked directly
+	// from Event, i.e. logging in OnEvent needs to skip two stack frames to get
+	// to the caller of Event.
+	OnEvent func(context.Context, interface{}, time.Time)
+
+	ents []entry
+}
+
+type entry struct {
+	ts int64 // unix nanos
+	tr interface{}
+}
+
+// Reset resets the Timing. This prepares it for recording a new
+// operation while allowing re-use of underlying memory and configuration.
+func (tm *Timing) Reset() {
+	tm.ents = append(tm.ents[:0], entry{
+		ts: tm.Now(),
+	})
+}
+
+func (tm *Timing) lastIdx() int {
+	return len(tm.ents) - 1
+}
+
+type End struct {
+	Event interface{}
+}
+
+// Event records an event to the timing. It returns the duration
+// elapsed since the last Event, and the internal index assigned
+// to the new event (for use in Between). The same event may be
+// recorded multiple times, which is useful in case of retry
+// loops.
+//
+// Note that the returned duration is only meaningful if the preceding
+// event is stable. In the following code, the likelyhood of a third event
+// being present (or slipping in down the road) is high and would lead to
+// under-reporting of the duration attributed to 'pouncing':
+//
+//   dur := tm.Event(ctx, "pouncing ends")
+//   doSomethingIncludingMaybeAddAnotherEvent(tm)
+//   fmt.Printf("pouncing took %s!", dur)
+//
+// The use of Between is generally preferable to avoid this problem in all but
+// trivially correct code.
+//
+// TODO update comment
+func (tm *Timing) Event(ctx context.Context, tr interface{}) int {
+	ts := tm.Now()
+	tm.ents = append(tm.ents, entry{
+		ts: ts,
+		tr: tr,
+	})
+	//tm.OnEvent(ctx, tr, ts)
+	return tm.lastIdx()
+}
+
+// Between returns the duration elapsed between two calls to Event as identified
+// by their respective indexes.
+func (tm *Timing) Between(from, to int) time.Duration {
+	return time.Duration(tm.ents[to].ts - tm.ents[from].ts) // TODO overflow checks
+}
+
+func (tm *Timing) Duration() time.Duration {
+	last := tm.lastIdx()
+	if last == 0 {
+		return 0
+	}
+	return tm.Between(0, last)
+}
+
+func (tm *Timing) Summary() map[interface{}]time.Duration {
+	res := make(map[interface{}]time.Duration)
+	m := map[interface{}]int{} // idx, key is never &End{X}
+
+	for i := range tm.ents {
+		// TODO: also compute the duration for which there isn't a single open
+		// interval. This is "time between the cracks". We may double-account time
+		// when intervals overlap, but if no interval is open, that's something that
+		// should go in a separate bucket and be reported.
+		el := &tm.ents[i]
+		e, ok := el.tr.(*End)
+		if !ok {
+			idx, open := m[el.tr]
+			if open {
+				// Invalid nesting, close current interval and start new one.
+				res[el.tr] += tm.Between(idx, i)
+				// Fall through.
+			}
+			m[el.tr] = i
+			continue
+		}
+		// Closing an existing interval. If it isn't open, that's a programming
+		// error but we'll handle it gracefully by ignoring it.
+		idx, open := m[e.Event]
+		if !open {
+			continue // ignore
+		}
+		res[e.Event] += tm.Between(idx, i)
+		delete(m, e.Event)
+	}
+	for e, idx := range m {
+		// Close any leftover events.
+		res[e] += tm.Between(idx, tm.lastIdx())
+	}
+	return res
+}
+
+// TODO(tbg): provide a method to summarize the timings.
+// This could power a granular per-range breakdown of execution
+// timings via a map[struct{A, B interface{}}ewma.MovingAverage.
+// where there is an entry for any events A and B that were ever
+// observed in succession. We should only do this if we know that
+// the set of possible events is small, for example if they are
+// guaranteed to come from a small set of singletons (as is the
+// envisioned reality).
@@ -0,0 +1,86 @@
+// Copyright 2022 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+package metric
+
+import (
+	"context"
+	"math/rand"
+	"testing"
+
+	"github.com/cockroachdb/cockroach/pkg/util/hlc"
+)
+
+func TestTiming(t *testing.T) {
+	ctx := context.Background()
+
+	const (
+		evStart = iota
+		evLeaseStart
+		evContentionStart
+		evSequenceStart
+		evEvalStart
+		evReplStart
+	)
+	mc := hlc.NewManualClock(1)
+	tm := &Timing{Now: func() int64 {
+		return mc.UnixNano()
+	}}
+	r := rand.New(rand.NewSource(123))
+	_ = r
+	tick := func() {
+		//nanos := r.Int63n(100)
+		const nanos = 1000
+		mc.Increment(nanos)
+	}
+
+	tm.Event(ctx, evStart)
+	tick()
+	tm.Event(ctx, &End{evStart})
+	tick()
+	tick()
+	tick()
+	tm.Event(ctx, evStart)
+	tick()
+	tm.Event(ctx, &End{evStart})
+
+	if false {
+		tm.Event(ctx, evStart)
+		tick()
+		tm.Event(ctx, &End{evLeaseStart})
+		tick() // where is this accounted for? Can return "gaps" from summary?
+		tm.Event(ctx, evLeaseStart)
+		tick()
+		tm.Event(ctx, &End{evLeaseStart})
+		tick()
+		tm.Event(ctx, evSequenceStart)
+		tick()
+		tm.Event(ctx, &End{evSequenceStart})
+		tick()
+		tm.Event(ctx, evContentionStart)
+		tick()
+		tm.Event(ctx, &End{evContentionStart})
+		tick()
+		tm.Event(ctx, evSequenceStart)
+		tick()
+		tm.Event(ctx, &End{evSequenceStart})
+		tick()
+		tm.Event(ctx, evEvalStart)
+		tick()
+		tm.Event(ctx, &End{evEvalStart})
+		tick()
+		tm.Event(ctx, evReplStart)
+		tick()
+		tm.Event(ctx, &End{evReplStart})
+	}
+
+	//require.EqualValues(t, mc.UnixNano(), 1+tm.Duration())
+	t.Error(tm.Summary())
+}