From 9c64cf6fe204344d10825085110072cc22e344c4 Mon Sep 17 00:00:00 2001 From: Andrew Werner Date: Tue, 4 Aug 2020 14:56:29 -0400 Subject: [PATCH] engine: set the MVCC timestamp on reads due to historical intents Before this commit, we'd return a zero-value MVCC timestamp when reading an intent from the intent history. This was problematic because elsewhere in the code we assume that we will always get a non-zero MVCC timestamp when a read returns a value. This is especially bizarre given that a read of the latest intent will return its write timestamp. The semantics here are such that we'll return the timestamp of the MVCC metadata for the row. I think this is the most reasonable thing to do as that timestamp ought to reflect the timestamp we return when we return the current intent and furthermore is the only timestamp we really have around. We could return the transactions current read or write timestamp but those both seem like worse choices. It's also worth noting that in the case where we need to read a non-zero value, we don't really care what that value is and the fact that we are reading this intent itself is somewhat suspect. That being said, I still think we should make this change in addition to any change made to prevent the code referenced in #49266 from needing it. Fixes #49266. Informs #50102. Release note: None --- c-deps/libroach/mvcc.h | 18 ++++++++-- pkg/storage/pebble_mvcc_scanner.go | 17 +++++++++- .../testdata/mvcc_histories/ignored_seq_nums | 34 +++++++++---------- pkg/storage/testdata/mvcc_histories/max_keys | 18 +++++----- .../testdata/mvcc_histories/target_bytes | 12 +++---- 5 files changed, 64 insertions(+), 35 deletions(-) diff --git a/c-deps/libroach/mvcc.h b/c-deps/libroach/mvcc.h index 5dce5b760112..0e41bffb0f7f 100644 --- a/c-deps/libroach/mvcc.h +++ b/c-deps/libroach/mvcc.h @@ -255,7 +255,21 @@ template class mvccScanner { rocksdb::Slice value = intent.value(); if (value.size() > 0 || tombstones_) { - kvs_->Put(cur_raw_key_, value); + // If we're adding a value due to a previous intent, as indicated by the + // zero-valued timestamp, we want to populate the timestamp as of current + // metaTimestamp. Note that this may be controversial as this maybe be + // neither the write timestamp when this intent was written. However, this + // was the only case in which a value could have been returned from a read + // without an MVCC timestamp. + if (cur_timestamp_ == kZeroTimestamp) { + auto meta_timestamp = meta_.timestamp(); + auto key = EncodeKey(cur_key_, + meta_timestamp.wall_time(), + meta_timestamp.logical()); + kvs_->Put(key, value); + } else { + kvs_->Put(cur_raw_key_, value); + } } return true; } @@ -296,7 +310,7 @@ template class mvccScanner { // version's timestamp and the scanner has been configured // to throw a write too old error on more recent versions. // Merge the current timestamp with the maximum timestamp - // we've seen so we know to return an error, but then keep + // we've seen so we know to return an error, but then keep // scanning so that we can return the largest possible time. if (cur_timestamp_ > most_recent_timestamp_) { most_recent_timestamp_ = cur_timestamp_; diff --git a/pkg/storage/pebble_mvcc_scanner.go b/pkg/storage/pebble_mvcc_scanner.go index f69afc6cbd7c..4a0fbed5206a 100644 --- a/pkg/storage/pebble_mvcc_scanner.go +++ b/pkg/storage/pebble_mvcc_scanner.go @@ -572,7 +572,22 @@ func (p *pebbleMVCCScanner) addAndAdvance(val []byte) bool { // Don't include deleted versions len(val) == 0, unless we've been instructed // to include tombstones in the results. if len(val) > 0 || p.tombstones { - p.results.put(p.curKey, val) + // If we're adding a value due to a previous intent, as indicated by the + // zero-valued timestamp, we want to populate the timestamp as of current + // metaTimestamp. Note that this may be controversial as this maybe be + // neither the write timestamp when this intent was written. However, this + // was the only case in which a value could have been returned from a read + // without an MVCC timestamp. + if p.curKey.Timestamp.IsEmpty() { + key := MVCCKey{ + Key: p.curKey.Key, + Timestamp: hlc.Timestamp(p.meta.Timestamp), + } + p.results.put(key, val) + } else { + p.results.put(p.curKey, val) + } + if p.targetBytes > 0 && p.results.bytes >= p.targetBytes { // When the target bytes are met or exceeded, stop producing more // keys. We implement this by reducing maxKeys to the current diff --git a/pkg/storage/testdata/mvcc_histories/ignored_seq_nums b/pkg/storage/testdata/mvcc_histories/ignored_seq_nums index 51100c7a6573..952b20abe0fe 100644 --- a/pkg/storage/testdata/mvcc_histories/ignored_seq_nums +++ b/pkg/storage/testdata/mvcc_histories/ignored_seq_nums @@ -39,12 +39,12 @@ with t=A txn_ignore_seqs seqs=(29-30) get k=k ---- -scan: "k" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 scan: "k/10" -> /BYTES/10 @0.000000011,0 scan: "k/20" -> /BYTES/20 @0.000000011,0 -get: "k" -> /BYTES/b @0,0 -get: "k" -> /BYTES/b @0,0 -get: "k" -> /BYTES/b @0,0 +get: "k" -> /BYTES/b @0.000000011,0 +get: "k" -> /BYTES/b @0.000000011,0 +get: "k" -> /BYTES/b @0.000000011,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=40} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=1 @@ -87,9 +87,9 @@ with t=A scan k=k end=-k get k=k ---- -scan: "k" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 scan: "k/20" -> /BYTES/20 @0.000000011,0 -get: "k" -> /BYTES/b @0,0 +get: "k" -> /BYTES/b @0.000000011,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=40} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=2 @@ -103,9 +103,9 @@ with t=A scan k=k end=-k get k=k ---- -scan: "k" -> /BYTES/a @0,0 +scan: "k" -> /BYTES/a @0.000000011,0 scan: "k/10" -> /BYTES/10 @0.000000011,0 -get: "k" -> /BYTES/a @0,0 +get: "k" -> /BYTES/a @0.000000011,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=12} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=1 @@ -119,9 +119,9 @@ with t=A scan k=k end=-k get k=k ---- -scan: "k" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 scan: "k/20" -> /BYTES/20 @0.000000011,0 -get: "k" -> /BYTES/b @0,0 +get: "k" -> /BYTES/b @0.000000011,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=22} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=1 @@ -135,10 +135,10 @@ with t=A scan k=k end=-k get k=k ---- -scan: "k" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 scan: "k/10" -> /BYTES/10 @0.000000011,0 scan: "k/20" -> /BYTES/20 @0.000000011,0 -get: "k" -> /BYTES/b @0,0 +get: "k" -> /BYTES/b @0.000000011,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=32} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=1 @@ -159,8 +159,8 @@ with t=A get k=k ---- meta: "k" -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=30} ts=0.000000011,0 del=false klen=12 vlen=6 ih={{10 /BYTES/a}{20 /BYTES/b}} -get: "k" -> /BYTES/b @0,0 -get: "k" -> /BYTES/b @0,0 +get: "k" -> /BYTES/b @0.000000011,0 +get: "k" -> /BYTES/b @0.000000011,0 meta: "k" -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000014,0 min=0,0 seq=20} ts=0.000000014,0 del=false klen=12 vlen=6 ih={{10 /BYTES/a}} get: "k" -> /BYTES/b @0.000000014,0 >> at end: @@ -184,10 +184,10 @@ with t=A scan k=k end=-k get k=k ---- -scan: "k" -> /BYTES/a @0,0 +scan: "k" -> /BYTES/a @0.000000014,0 scan: "k/10" -> /BYTES/10 @0.000000011,0 scan: "k/30" -> /BYTES/30 @0.000000011,0 -get: "k" -> /BYTES/a @0,0 +get: "k" -> /BYTES/a @0.000000014,0 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000014,0 min=0,0 seq=40} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 isn=1 @@ -339,7 +339,7 @@ with t=C resolve_intent k=m status=COMMITTED ---- meta: "m" -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000030,0 min=0,0 seq=30} ts=0.000000030,0 del=false klen=12 vlen=6 ih={{10 /BYTES/a}{20 /BYTES/b}} -get: "m" -> /BYTES/a @0,0 +get: "m" -> /BYTES/a @0.000000030,0 >> at end: txn: "C" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000030,0 min=0,0 seq=30} lock=true stat=PENDING rts=0.000000030,0 wto=false max=0,0 isn=1 data: "k"/0.000000014,0 -> /BYTES/b diff --git a/pkg/storage/testdata/mvcc_histories/max_keys b/pkg/storage/testdata/mvcc_histories/max_keys index 3ce655dede5f..dbc019cf2cc9 100644 --- a/pkg/storage/testdata/mvcc_histories/max_keys +++ b/pkg/storage/testdata/mvcc_histories/max_keys @@ -112,13 +112,13 @@ with t=A ts=11,0 max=3 scan k=k end=o scan k=k end=o reverse=true ---- -scan: "k" -> /BYTES/b @0,0 -scan: "l" -> /BYTES/b @0,0 -scan: "m" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 +scan: "l" -> /BYTES/b @0.000000011,0 +scan: "m" -> /BYTES/b @0.000000011,0 scan: resume span ["n","o") -scan: "n" -> /BYTES/b @0,0 -scan: "m" -> /BYTES/b @0,0 -scan: "l" -> /BYTES/b @0,0 +scan: "n" -> /BYTES/b @0.000000011,0 +scan: "m" -> /BYTES/b @0.000000011,0 +scan: "l" -> /BYTES/b @0.000000011,0 scan: resume span ["k","k\x00") >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=20} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 @@ -176,9 +176,9 @@ with t=B ts=12,0 max=3 txn_step seq=20 scan k=k end=o ---- -scan: "k" -> /BYTES/b @0,0 -scan: "l" -> /BYTES/b @0,0 -scan: "m" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000012,0 +scan: "l" -> /BYTES/b @0.000000012,0 +scan: "m" -> /BYTES/b @0.000000012,0 scan: resume span ["n","o") >> at end: txn: "B" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000012,0 min=0,0 seq=20} lock=true stat=PENDING rts=0.000000012,0 wto=false max=0,0 diff --git a/pkg/storage/testdata/mvcc_histories/target_bytes b/pkg/storage/testdata/mvcc_histories/target_bytes index 71377fd432c0..0f593b2544ab 100644 --- a/pkg/storage/testdata/mvcc_histories/target_bytes +++ b/pkg/storage/testdata/mvcc_histories/target_bytes @@ -272,14 +272,14 @@ with t=A ts=11,0 targetbytes=32 scan k=k end=o scan k=k end=o reverse=true ---- -scan: "k" -> /BYTES/b @0,0 -scan: "l" -> /BYTES/b @0,0 +scan: "k" -> /BYTES/b @0.000000011,0 +scan: "l" -> /BYTES/b @0.000000011,0 scan: resume span ["m","o") -scan: 32 bytes (target 32) -scan: "n" -> /BYTES/b @0,0 -scan: "m" -> /BYTES/b @0,0 +scan: 50 bytes (target 32) +scan: "n" -> /BYTES/b @0.000000011,0 +scan: "m" -> /BYTES/b @0.000000011,0 scan: resume span ["k","l\x00") -scan: 32 bytes (target 32) +scan: 50 bytes (target 32) >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=0.000000011,0 min=0,0 seq=20} lock=true stat=PENDING rts=0.000000011,0 wto=false max=0,0 data: "a"/0.000000123,45 -> /BYTES/abcdef