Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
49839: roachtest: Pin pgx to 4.6.0 r=apantel a=apantel

Release note: None

50255: ui,sql: add Vectorized info to statements page r=yuzefovich a=yuzefovich

This commit adds a box to Statements page that displays whether the
vectorized execution engine is used.

Release note (admin ui change): Statements page now contains information
about which execution engine is used (vectorized or not).


50286: sql: make error message for invalid id type reference better r=ajwerner a=rohany

Fixes #50276.

Release note (bug fix): Fix an internal error from being thrown when
referencing a type that does not exist by ID.

50402: kvserver: add a timeout to getChecksum to fix a flaky test r=lunevalex a=lunevalex

Fixes #48251

This change fixes an issue introduced in #49763. When a replica is being restored from a snapshot it does not compute checksums. This leads to a timeout on the whole check, resulting in a failure. A beter approach is to timeout the specific getChecksum request on a replica if we think it's not going to respond. The getChecksum() method will now wait a fraction of the whole deadline to see if a computeChecksum request is being computed and if one has not started in that time it will fail fast, instead of waiting the whole RPC deadline. We considered a number of alternative approaches, which either did not work or were unpalatable i.e. special casing LEARNER replicas in getChecksum, releasing getChecksum notify channel when we apply snapshots.

Release note : NONE

50411: gcjob: skip table if descriptor doesn't exist r=lucy-zhang a=lucy-zhang

Previously, the GC job would fail if a descriptor for a table to be
dropped did not exist. This can happen in cases where multiple GC jobs
are created for the same table, and one job deletes the table descriptor
first.

The existence of multiple GC jobs for the same table is itself
undesirable, but it's possible when some schema change job other than
the one created when dropping the table sees that the table is in the
dropped state and tries to drop the table anyway.

This PR mitigates the problem by skipping the table and marking it
internally as GC'ed if the descriptor doesn't exist.

Touches #50344.

Release note (bug fix): Fixes a bug affecting some `DROP DATABASE`
schema changes where multiple GC jobs are created, causing the GC job
for the database to fail. GC jobs will no longer fail upon failing to
find a table descriptor already deleted by a different GC job.

50502: sql: remove type annotations from computed cols in information_schema r=rafiss a=rohany

Fixes #50499.

No release note since this was introduced this release.

Release note: None

50547: tree: add a case for enums in AsJSON r=otan a=rohany

Fixes #50524.

This commit fixes an assertion failure by adding a case for enum types
in the `tree.AsJSON` function.

Release note: None

Co-authored-by: Adam Pantel <adam@cockroachlabs.com>
Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com>
Co-authored-by: Rohan Yadav <rohany@alumni.cmu.edu>
Co-authored-by: Alex Lunev <alexl@cockroachlabs.com>
Co-authored-by: Lucy Zhang <lucy-zhang@users.noreply.github.com>
  • Loading branch information
6 people committed Jun 23, 2020
8 parents c715be8 + d63c400 + c77d5f0 + a72dd5a + 20a382c + 8bcfe10 + bb4dd50 + 56f1983 commit 60ecb57
Show file tree
Hide file tree
Showing 27 changed files with 496 additions and 146 deletions.
4 changes: 3 additions & 1 deletion pkg/cmd/roachtest/pgx.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
)

var pgxReleaseTagRegex = regexp.MustCompile(`^v(?P<major>\d+)\.(?P<minor>\d+)\.(?P<point>\d+)$`)
var supportedTag = "v4.6.0"

// This test runs pgx's full test suite against a single cockroach node.

Expand Down Expand Up @@ -58,6 +59,7 @@ func registerPgx(r *testRegistry) {
t.Fatal(err)
}
c.l.Printf("Latest jackc/pgx release is %s.", latestTag)
c.l.Printf("Supported release is %s.", supportedTag)

t.Status("installing go-junit-report")
if err := repeatRunE(
Expand Down Expand Up @@ -109,7 +111,7 @@ func registerPgx(r *testRegistry) {
results := newORMTestsResults()
results.parseJUnitXML(t, expectedFailures, ignorelist, xmlResults)
results.summarizeAll(
t, "pgx", blocklistName, expectedFailures, version, latestTag,
t, "pgx", blocklistName, expectedFailures, version, supportedTag,
)
}

Expand Down
97 changes: 58 additions & 39 deletions pkg/cmd/roachtest/pgx_blocklist.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,45 +22,64 @@ var pgxBlocklists = blocklistsForVersion{
var pgxBlocklist20_2 = pgxBlocklist20_1

var pgxBlocklist20_1 = blocklist{
"v4.Example_CustomType": "27796",
"v4.TestConnBeginBatchDeferredError": "31632",
"v4.TestConnCopyFromFailServerSideMidway": "19603",
"v4.TestConnCopyFromJSON": "19603",
"v4.TestConnCopyFromLarge": "19603",
"v4.TestConnCopyFromSmall": "19603",
"v4.TestConnQueryDeferredError": "31632",
"v4.TestConnQueryErrorWhileReturningRows": "26925",
"v4.TestConnQueryReadRowMultipleTimes": "26925",
"v4.TestConnQueryValues": "26925",
"v4.TestConnQueryValuesWithUnknownOID": "26925",
"v4.TestConnSendBatch": "44712",
"v4.TestConnSendBatchWithPreparedStatement": "41558",
"v4.TestConnSimpleProtocol": "21286",
"v4.TestConnSimpleProtocolRefusesNonStandardConformingStrings": "36215",
"v4.TestConnSimpleProtocolRefusesNonUTF8ClientEncoding": "37129",
"v4.TestDomainType": "27796",
"v4.TestFatalRxError": "35897",
"v4.TestFatalTxError": "35897",
"v4.TestInetCIDRArrayTranscodeIP": "18846",
"v4.TestInetCIDRArrayTranscodeIPNet": "18846",
"v4.TestInetCIDRTranscodeIP": "18846",
"v4.TestInetCIDRTranscodeIPNet": "18846",
"v4.TestInetCIDRTranscodeWithJustIP": "18846",
"v4.TestLargeObjects": "26725",
"v4.TestLargeObjectsMultipleTransactions": "26725",
"v4.TestLargeObjectsPreferSimpleProtocol": "26725",
"v4.TestListenNotify": "41522",
"v4.TestListenNotifySelfNotification": "41522",
"v4.TestListenNotifyWhileBusyIsSafe": "41522",
"v4.TestQueryContextErrorWhileReceivingRows": "26925",
"v4.TestRowDecode": "26925",
"v4.TestTransactionSuccessfulCommit": "31632",
"v4.TestTransactionSuccessfulRollback": "31632",
"v4.TestTxCommitSerializationFailure": "12701",
"v4.TestTxCommitWhenTxBroken": "31632",
"v4.TestTxNestedTransactionCommit": "31632",
"v4.TestTxNestedTransactionRollback": "31632",
"v4.TestUnregisteredTypeUsableAsStringArgumentAndBaseResult": "27796",
"v4.Example_CustomType": "27796",
"v4.TestConnBeginBatchDeferredError": "31632",
"v4.TestConnCopyFromEnum": "unknown",
"v4.TestConnCopyFromJSON": "19603",
"v4.TestConnCopyFromLarge": "19603",
"v4.TestConnCopyFromSmall": "19603",
"v4.TestConnQueryDeferredError": "31632",
"v4.TestConnQueryErrorWhileReturningRows": "26925",
"v4.TestConnQueryReadRowMultipleTimes": "26925",
"v4.TestConnQueryValues": "26925",
"v4.TestConnQueryValuesWithUnknownOID": "26925",
"v4.TestConnSendBatch": "44712",
"v4.TestConnSendBatchWithPreparedStatement": "41558",
"v4.TestConnSimpleProtocol": "21286",
"v4.TestConnSimpleProtocolRefusesNonStandardConformingStrings": "36215",
"v4.TestConnSimpleProtocolRefusesNonUTF8ClientEncoding": "37129",
"v4.TestDomainType": "27796",
"v4.TestDomainType/DefaultProto": "unknown",
"v4.TestDomainType/SimpleProto": "unknown",
"v4.TestFatalRxError": "35897",
"v4.TestFatalTxError": "35897",
"v4.TestInetCIDRArrayTranscodeIP": "18846",
"v4.TestInetCIDRArrayTranscodeIP/DefaultProto": "unknown",
"v4.TestInetCIDRArrayTranscodeIP/SimpleProto": "unknown",
"v4.TestInetCIDRArrayTranscodeIPNet": "18846",
"v4.TestInetCIDRArrayTranscodeIPNet/DefaultProto": "unknown",
"v4.TestInetCIDRArrayTranscodeIPNet/SimpleProto": "unknown",
"v4.TestInetCIDRTranscodeIP": "18846",
"v4.TestInetCIDRTranscodeIP/DefaultProto": "unknown",
"v4.TestInetCIDRTranscodeIP/SimpleProto": "unknown",
"v4.TestInetCIDRTranscodeIPNet": "18846",
"v4.TestInetCIDRTranscodeIPNet/DefaultProto": "unknown",
"v4.TestInetCIDRTranscodeIPNet/SimpleProto": "unknown",
"v4.TestInetCIDRTranscodeWithJustIP": "18846",
"v4.TestInetCIDRTranscodeWithJustIP/DefaultProto": "unknown",
"v4.TestInetCIDRTranscodeWithJustIP/SimpleProto": "unknown",
"v4.TestInsertBoolArray": "unknown",
"v4.TestInsertBoolArray/SimpleProto": "unknown",
"v4.TestInsertTimestampArray": "unknown",
"v4.TestInsertTimestampArray/SimpleProto": "unknown",
"v4.TestLargeObjects": "26725",
"v4.TestLargeObjectsMultipleTransactions": "26725",
"v4.TestLargeObjectsPreferSimpleProtocol": "26725",
"v4.TestListenNotify": "41522",
"v4.TestListenNotifySelfNotification": "41522",
"v4.TestListenNotifyWhileBusyIsSafe": "41522",
"v4.TestQueryContextErrorWhileReceivingRows": "26925",
"v4.TestRowDecodeBinary": "unknown",
"v4.TestSendBatchSimpleProtocol": "unknown",
"v4.TestTransactionSuccessfulCommit": "31632",
"v4.TestTransactionSuccessfulRollback": "31632",
"v4.TestTxCommitSerializationFailure": "12701",
"v4.TestTxCommitWhenTxBroken": "31632",
"v4.TestTxNestedTransactionCommit": "31632",
"v4.TestTxNestedTransactionRollback": "31632",
"v4.TestUnregisteredTypeUsableAsStringArgumentAndBaseResult": "27796",
"v4.TestUnregisteredTypeUsableAsStringArgumentAndBaseResult/DefaultProto": "unknown",
"v4.TestUnregisteredTypeUsableAsStringArgumentAndBaseResult/SimpleProto": "unknown",
}

var pgxIgnorelist20_2 = pgxIgnorelist20_1
Expand Down
1 change: 0 additions & 1 deletion pkg/kv/kvserver/consistency_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,6 @@ func TestCheckConsistencyInconsistent(t *testing.T) {
// The upreplication here is immaterial and serves only to add realism to the test.
func TestConsistencyQueueRecomputeStats(t *testing.T) {
defer leaktest.AfterTest(t)()
t.Skip("Skip until #48251 is fixed")
testutils.RunTrueAndFalse(t, "hadEstimates", testConsistencyQueueRecomputeStatsImpl)
}

Expand Down
78 changes: 69 additions & 9 deletions pkg/kv/kvserver/replica_consistency.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,16 +423,21 @@ func (r *Replica) getChecksum(ctx context.Context, id uuid.UUID) (ReplicaChecksu
r.mu.checksums[id] = c
}
r.mu.Unlock()
// Wait
select {
case <-r.store.Stopper().ShouldQuiesce():
return ReplicaChecksum{},
errors.Errorf("store quiescing while waiting for compute checksum (ID = %s)", id)
case <-ctx.Done():
return ReplicaChecksum{},
errors.Wrapf(ctx.Err(), "while waiting for compute checksum (ID = %s)", id)
case <-c.notify:

// Wait for the checksum to compute or at least to start.
computed, err := r.checksumInitialWait(ctx, id, c.notify)
if err != nil {
return ReplicaChecksum{}, err
}
// If the checksum started, but has not completed commit
// to waiting the full deadline.
if !computed {
_, err = r.checksumWait(ctx, id, c.notify, nil)
if err != nil {
return ReplicaChecksum{}, err
}
}

if log.V(1) {
log.Infof(ctx, "waited for compute checksum for %s", timeutil.Since(now))
}
Expand All @@ -448,6 +453,61 @@ func (r *Replica) getChecksum(ctx context.Context, id uuid.UUID) (ReplicaChecksu
return c, nil
}

// Waits for the checksum to be available or for the checksum to start computing.
// If we waited for 10% of the deadline and it has not started, then it's
// unlikely to start because this replica is most likely being restored from
// snapshots.
func (r *Replica) checksumInitialWait(
ctx context.Context, id uuid.UUID, notify chan struct{},
) (bool, error) {
d, dOk := ctx.Deadline()
// The max wait time should be 5 seconds, so we dont end up waiting for
// minutes for a huge range.
maxInitialWait := 5 * time.Second
var initialWait <-chan time.Time
if dOk {
duration := time.Duration(timeutil.Until(d).Nanoseconds() / 10)
if duration > maxInitialWait {
duration = maxInitialWait
}
initialWait = time.After(duration)
} else {
initialWait = time.After(maxInitialWait)
}
return r.checksumWait(ctx, id, notify, initialWait)
}

// checksumWait waits for the checksum to be available or for the computation
// to start within the initialWait time. The bool return flag is used to
// indicate if a checksum is available (true) or if the initial wait has expired
// and the caller should wait more, since the checksum computation started.
func (r *Replica) checksumWait(
ctx context.Context, id uuid.UUID, notify chan struct{}, initialWait <-chan time.Time,
) (bool, error) {
// Wait
select {
case <-r.store.Stopper().ShouldQuiesce():
return false,
errors.Errorf("store quiescing while waiting for compute checksum (ID = %s)", id)
case <-ctx.Done():
return false,
errors.Wrapf(ctx.Err(), "while waiting for compute checksum (ID = %s)", id)
case <-initialWait:
{
r.mu.Lock()
started := r.mu.checksums[id].started
r.mu.Unlock()
if !started {
return false,
errors.Errorf("checksum computation did not start in time for (ID = %s)", id)
}
return false, nil
}
case <-notify:
return true, nil
}
}

// computeChecksumDone adds the computed checksum, sets a deadline for GCing the
// checksum, and sends out a notification.
func (r *Replica) computeChecksumDone(
Expand Down
60 changes: 60 additions & 0 deletions pkg/kv/kvserver/replica_consistency_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package kvserver
import (
"context"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
Expand Down Expand Up @@ -56,3 +57,62 @@ func TestReplicaChecksumVersion(t *testing.T) {
}
})
}

func TestGetChecksumNotSuccessfulExitConditions(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx, cancel := context.WithTimeout(context.Background(), 1000*time.Millisecond)
defer cancel()

tc := testContext{}
stopper := stop.NewStopper()
defer stopper.Stop(ctx)
tc.Start(t, stopper)

id := uuid.FastMakeV4()
notify := make(chan struct{})
close(notify)

// Simple condition, the checksum is notified, but not computed.
tc.repl.mu.Lock()
tc.repl.mu.checksums[id] = ReplicaChecksum{notify: notify}
tc.repl.mu.Unlock()
rc, err := tc.repl.getChecksum(ctx, id)
if !testutils.IsError(err, "no checksum found") {
t.Fatal(err)
}
require.Nil(t, rc.Checksum)
// Next condition, the initial wait expires and checksum is not started,
// this will take 10ms.
id = uuid.FastMakeV4()
tc.repl.mu.Lock()
tc.repl.mu.checksums[id] = ReplicaChecksum{notify: make(chan struct{})}
tc.repl.mu.Unlock()
rc, err = tc.repl.getChecksum(ctx, id)
if !testutils.IsError(err, "checksum computation did not start") {
t.Fatal(err)
}
require.Nil(t, rc.Checksum)
// Next condition, initial wait expired and we found the started flag,
// so next step is for context deadline.
id = uuid.FastMakeV4()
tc.repl.mu.Lock()
tc.repl.mu.checksums[id] = ReplicaChecksum{notify: make(chan struct{}), started: true}
tc.repl.mu.Unlock()
rc, err = tc.repl.getChecksum(ctx, id)
if !testutils.IsError(err, "context deadline exceeded") {
t.Fatal(err)
}
require.Nil(t, rc.Checksum)

// Need to reset the context, since we deadlined it above.
ctx, cancel = context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
// Next condition, node should quiesce.
tc.repl.store.Stopper().Quiesce(ctx)
rc, err = tc.repl.getChecksum(ctx, uuid.FastMakeV4())
if !testutils.IsError(err, "store quiescing") {
t.Fatal(err)
}
require.Nil(t, rc.Checksum)
}
Loading

0 comments on commit 60ecb57

Please sign in to comment.