Skip to content

Commit

Permalink
kv: support unreplicated locks, hook up SELECT FOR UPDATE
Browse files Browse the repository at this point in the history
Closes cockroachdb#40205.
Informs cockroachdb#41720.

This change teaches the KV client and the KV API about unreplicated locks.
It then adds a KeyLocking mode to ScanRequest and ReverseScanRequest, which
allows their users to select the locking strength that they would like the
scan to use. This locking strength defaults to None, which corresponds to
the current behavior. However, some users will want to acquire locks on each
row scanned, which is now possible by setting the locking strength to a
stronger level. For now, only the Exclusive strength is supported.

The change then revisits SQL's row-level locking support, which is supported
all the way down to the row fetcher for implicit (e.g. UPDATE) and explicit
(e.g. SELECT ... FOR UPDATE) upgrade locking. The change uses the new
key-locking functionality in the KV API to hook up row-level locking,
completing the integration of SELECT FOR UPDATE with the KV layer and,
in particular, the new lock-table structure.

cockroachdb#43775 described the three main
benefits of this change:
- higher throughput under contention
- lower latency and improved fairness under contention
- a reduction in transaction retries under contention

I've revisited those results a few times in the last two months and seen that
the results continue to hold, and in some cases they have improved. I intend
to update this PR with a more complete analysis of its impact on those three
areas.

Release note (sql change): SELECT FOR UPDATE now hooks into a new
leaseholder-only locking mechanism. This allows the feature to be used
to improve performance of transactional that read, modify, and write
contended to rows. Similarly, UPDATE statements now use this new
mechanism by default, meaning that their performance under contention is
improved.
  • Loading branch information
nvanbenschoten committed Mar 4, 2020
1 parent ae0c568 commit 99cfc3e
Show file tree
Hide file tree
Showing 35 changed files with 1,810 additions and 1,131 deletions.
88 changes: 80 additions & 8 deletions c-deps/libroach/protos/roachpb/api.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 43 additions & 0 deletions c-deps/libroach/protos/roachpb/api.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion c-deps/libroach/protos/roachpb/data.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<tr><td><code>kv.replication_reports.interval</code></td><td>duration</td><td><code>1m0s</code></td><td>the frequency for generating the replication_constraint_stats, replication_stats_report and replication_critical_localities reports (set to 0 to disable)</td></tr>
<tr><td><code>kv.snapshot_rebalance.max_rate</code></td><td>byte size</td><td><code>8.0 MiB</code></td><td>the rate limit (bytes/sec) to use for rebalance and upreplication snapshots</td></tr>
<tr><td><code>kv.snapshot_recovery.max_rate</code></td><td>byte size</td><td><code>8.0 MiB</code></td><td>the rate limit (bytes/sec) to use for recovery snapshots</td></tr>
<tr><td><code>kv.transaction.max_intents_bytes</code></td><td>integer</td><td><code>262144</code></td><td>maximum number of bytes used to track write intents in transactions</td></tr>
<tr><td><code>kv.transaction.max_intents_bytes</code></td><td>integer</td><td><code>262144</code></td><td>maximum number of bytes used to track locks in transactions</td></tr>
<tr><td><code>kv.transaction.max_refresh_spans_bytes</code></td><td>integer</td><td><code>256000</code></td><td>maximum number of bytes used to track refresh spans in serializable transactions</td></tr>
<tr><td><code>server.clock.forward_jump_check_enabled</code></td><td>boolean</td><td><code>false</code></td><td>if enabled, forward clock jumps > max_offset/2 will cause a panic</td></tr>
<tr><td><code>server.clock.persist_upper_bound_interval</code></td><td>duration</td><td><code>0s</code></td><td>the interval between persisting the wall time upper bound of the clock. The clock does not generate a wall time greater than the persisted timestamp and will panic if it sees a wall time greater than this value. When cockroach starts, it waits for the wall time to catch-up till this persisted timestamp. This guarantees monotonic wall time across server restarts. Not setting this or setting a value of 0 disables this feature.</td></tr>
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/changefeedccl/kvfeed/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func (p *scanRequestScanner) exportSpan(
for remaining := span; ; {
start := timeutil.Now()
b := txn.NewBatch()
r := roachpb.NewScan(remaining.Key, remaining.EndKey).(*roachpb.ScanRequest)
r := roachpb.NewScan(remaining.Key, remaining.EndKey, false /* forUpdate */).(*roachpb.ScanRequest)
r.ScanFormat = roachpb.BATCH_RESPONSE
b.Header.TargetBytes = targetBytesPerScan
// NB: We use a raw request rather than the Scan() method because we want
Expand Down
34 changes: 29 additions & 5 deletions pkg/internal/client/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ func (b *Batch) Inc(key interface{}, value int64) {
b.initResult(1, 1, notRaw, nil)
}

func (b *Batch) scan(s, e interface{}, isReverse bool) {
func (b *Batch) scan(s, e interface{}, isReverse, forUpdate bool) {
begin, err := marshalKey(s)
if err != nil {
b.initResult(0, 0, notRaw, err)
Expand All @@ -532,9 +532,9 @@ func (b *Batch) scan(s, e interface{}, isReverse bool) {
return
}
if !isReverse {
b.appendReqs(roachpb.NewScan(begin, end))
b.appendReqs(roachpb.NewScan(begin, end, forUpdate))
} else {
b.appendReqs(roachpb.NewReverseScan(begin, end))
b.appendReqs(roachpb.NewReverseScan(begin, end, forUpdate))
}
b.initResult(1, 0, notRaw, nil)
}
Expand All @@ -547,7 +547,19 @@ func (b *Batch) scan(s, e interface{}, isReverse bool) {
//
// key can be either a byte slice or a string.
func (b *Batch) Scan(s, e interface{}) {
b.scan(s, e, false)
b.scan(s, e, false /* isReverse */, false /* forUpdate */)
}

// ScanForUpdate retrieves the key/values between begin (inclusive) and end
// (exclusive) in ascending order. Unreplicated, exclusive locks are acquired on
// each of the returned keys.
//
// A new result will be appended to the batch which will contain "rows" (each
// row is a key/value pair) and Result.Err will indicate success or failure.
//
// key can be either a byte slice or a string.
func (b *Batch) ScanForUpdate(s, e interface{}) {
b.scan(s, e, false /* isReverse */, true /* forUpdate */)
}

// ReverseScan retrieves the rows between begin (inclusive) and end (exclusive)
Expand All @@ -558,7 +570,19 @@ func (b *Batch) Scan(s, e interface{}) {
//
// key can be either a byte slice or a string.
func (b *Batch) ReverseScan(s, e interface{}) {
b.scan(s, e, true)
b.scan(s, e, true /* isReverse */, false /* forUpdate */)
}

// ReverseScanForUpdate retrieves the rows between begin (inclusive) and end
// (exclusive) in descending order. Unreplicated, exclusive locks are acquired
// on each of the returned keys.
//
// A new result will be appended to the batch which will contain "rows" (each
// "row" is a key/value pair) and Result.Err will indicate success or failure.
//
// key can be either a byte slice or a string.
func (b *Batch) ReverseScanForUpdate(s, e interface{}) {
b.scan(s, e, true /* isReverse */, true /* forUpdate */)
}

// Del deletes one or more keys.
Expand Down
Loading

0 comments on commit 99cfc3e

Please sign in to comment.