Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sql: v20.1.0: read table descriptor without ModificationTime with zero MVCC timestamp #49266

Closed
cockroach-teamcity opened this issue May 19, 2020 · 17 comments · Fixed by #52358
Closed
Assignees
Labels
C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report.

Comments

@cockroach-teamcity
Copy link
Member

This issue was autofiled by Sentry. It represents a crash or reported error on a live cluster with telemetry enabled.

Sentry link: https://sentry.io/organizations/cockroach-labs/issues/1675228652/?referrer=webhooks_plugin

Panic message:

structured.go:3478: read table descriptor for %q (%d.%d) without ModificationTime with zero MVCC timestamp | string; sqlbase.ID; sqlbase.ID

Stacktrace (expand for inline code snippets):

if t != nil {
t.maybeSetTimeFromMVCCTimestamp(ts)
}
in pkg/sql/sqlbase.(*Descriptor).Table
}
table := desc.Table(ts)
if table == nil {
in pkg/sql/sqlbase.getTableDescFromIDRaw
if _, ok := otherUnupgradedTables[ref.Table]; !ok {
tbl, err := getTableDescFromIDRaw(ctx, protoGetter, ref.Table)
if err != nil {
in pkg/sql/sqlbase.maybeUpgradeForeignKeyRepOnIndex
for i := range desc.Indexes {
newChanged, err := maybeUpgradeForeignKeyRepOnIndex(
ctx, protoGetter, otherUnupgradedTables, desc, &desc.Indexes[i], skipFKsWithNoMatchingTable,
in pkg/sql/sqlbase.(*TableDescriptor).MaybeUpgradeForeignKeyRepresentation
if protoGetter != nil {
if _, err := desc.MaybeUpgradeForeignKeyRepresentation(ctx, protoGetter, false /* skipFKsWithNoMatchingTable*/); err != nil {
return err
in pkg/sql/sqlbase.(*TableDescriptor).MaybeFillInDescriptor
if err := table.MaybeFillInDescriptor(ctx, protoGetter); err != nil {
return nil, err
in pkg/sql/sqlbase.GetTableDescFromID
) (*MutableTableDescriptor, error) {
table, err := GetTableDescFromID(ctx, protoGetter, id)
if err != nil {
in pkg/sql/sqlbase.GetMutableTableDescFromID

cockroach/pkg/sql/table.go

Lines 480 to 482 in 9d456b9

}
return sqlbase.GetMutableTableDescFromID(ctx, txn, tableID)
}
in pkg/sql.(*TableCollection).getMutableTableVersionByID
} else {
lookup, err := p.Tables().getMutableTableVersionByID(ctx, ref.ReferencedTableID, p.txn)
if err != nil {
in pkg/sql.(*planner).removeFKBackReference
ref := &tableDesc.OutboundFKs[i]
if err := p.removeFKBackReference(ctx, tableDesc, ref); err != nil {
return droppedViews, err
in pkg/sql.(*planner).dropTableImpl
// TODO(knz): dependent dropped table names should be qualified here.
cascadedObjects, err = p.dropTableImpl(ctx, desc, false /* queueJob */, "")
}
in pkg/sql.(*dropDatabaseNode).startExec

cockroach/pkg/sql/plan.go

Lines 394 to 396 in 9d456b9

}
return n.startExec(params)
},
in pkg/sql.startExec.func2

cockroach/pkg/sql/walk.go

Lines 143 to 145 in 9d456b9

}
v.err = v.observer.leaveNode(name, plan)
}()
in pkg/sql.(*planVisitor).visitInternal.func1

cockroach/pkg/sql/walk.go

Lines 720 to 722 in 9d456b9

}
}
in pkg/sql.(*planVisitor).visitInternal

cockroach/pkg/sql/walk.go

Lines 110 to 112 in 9d456b9

}
v.visitInternal(plan, name)
return plan
in pkg/sql.(*planVisitor).visit
v := makePlanVisitor(ctx, observer)
v.visit(plan)
return v.err
in pkg/sql.walkPlan

cockroach/pkg/sql/plan.go

Lines 397 to 399 in 9d456b9

}
return walkPlan(params.ctx, plan, o)
}
in pkg/sql.startExec
// This starts all of the nodes below this node.
if err := startExec(p.params, p.node); err != nil {
p.MoveToDraining(err)
in pkg/sql.(*planNodeToRowSource).Start
}
ctx = pb.self.Start(ctx)
Run(ctx, pb.self, pb.Out.output)
in pkg/sql/execinfra.(*ProcessorBase).Run
}
headProc.Run(ctx)
return nil
in pkg/sql/flowinfra.(*FlowBase).Run
// TODO(radu): this should go through the flow scheduler.
if err := flow.Run(ctx, func() {}); err != nil {
log.Fatalf(ctx, "unexpected error from syncFlow.Start(): %s "+
in pkg/sql.(*DistSQLPlanner).Run
recv.expectedRowsRead = int64(physPlan.TotalEstimatedScannedRows)
return dsp.Run(planCtx, txn, &physPlan, recv, evalCtx, nil /* finishedSetupFn */)
}
in pkg/sql.(*DistSQLPlanner).PlanAndRun
// the planner whether or not to plan remote table readers.
cleanup := ex.server.cfg.DistSQLPlanner.PlanAndRun(
ctx, evalCtx, planCtx, planner.txn, planner.curPlan.plan, recv,
in pkg/sql.(*connExecutor).execWithDistSQLEngine
ex.sessionTracing.TraceExecStart(ctx, "distributed")
bytesRead, rowsRead, err := ex.execWithDistSQLEngine(ctx, planner, stmt.AST.StatementType(), res, distributePlan, progAtomic)
ex.sessionTracing.TraceExecEnd(ctx, res.Err(), res.RowsAffected())
in pkg/sql.(*connExecutor).dispatchToExecutionEngine
p.autoCommit = os.ImplicitTxn.Get() && !ex.server.cfg.TestingKnobs.DisableAutoCommit
if err := ex.dispatchToExecutionEngine(ctx, p, res); err != nil {
return nil, nil, err
in pkg/sql.(*connExecutor).execStmtInOpenState
} else {
ev, payload, err = ex.execStmtInOpenState(ctx, stmt, res, pinfo)
}
in pkg/sql.(*connExecutor).execStmt
stmtCtx := withStatement(ctx, ex.curStmt)
ev, payload, err = ex.execStmt(stmtCtx, curStmt, stmtRes, nil /* pinfo */)
if err != nil {
in pkg/sql.(*connExecutor).execCmd
var err error
if err = ex.execCmd(ex.Ctx()); err != nil {
if err == io.EOF || err == errDrainingComplete {
in pkg/sql.(*connExecutor).run
}()
return h.ex.run(ctx, s.pool, reserved, cancel)
}
in pkg/sql.(*Server).ServeConn
reservedOwned = false // We're about to pass ownership away.
retErr = sqlServer.ServeConn(ctx, connHandler, reserved, cancelConn)
}()
in pkg/sql/pgwire.(*conn).processCommandsAsync.func1

pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.(*Descriptor).Table at line 3478
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.getTableDescFromIDRaw at line 408
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.maybeUpgradeForeignKeyRepOnIndex at line 989
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.(*TableDescriptor).MaybeUpgradeForeignKeyRepresentation at line 956
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.(*TableDescriptor).MaybeFillInDescriptor at line 893
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.GetTableDescFromID at line 366
pkg/sql/sqlbase/structured.go in pkg/sql/sqlbase.GetMutableTableDescFromID at line 422
pkg/sql/table.go in pkg/sql.(*TableCollection).getMutableTableVersionByID at line 481
pkg/sql/drop_table.go in pkg/sql.(*planner).removeFKBackReference at line 490
pkg/sql/drop_table.go in pkg/sql.(*planner).dropTableImpl at line 252
pkg/sql/drop_database.go in pkg/sql.(*dropDatabaseNode).startExec at line 197
pkg/sql/plan.go in pkg/sql.startExec.func2 at line 395
pkg/sql/walk.go in pkg/sql.(*planVisitor).visitInternal.func1 at line 144
pkg/sql/walk.go in pkg/sql.(*planVisitor).visitInternal at line 721
pkg/sql/walk.go in pkg/sql.(*planVisitor).visit at line 111
pkg/sql/walk.go in pkg/sql.walkPlan at line 75
pkg/sql/plan.go in pkg/sql.startExec at line 398
pkg/sql/plan_node_to_row_source.go in pkg/sql.(*planNodeToRowSource).Start at line 117
pkg/sql/execinfra/processorsbase.go in pkg/sql/execinfra.(*ProcessorBase).Run at line 748
pkg/sql/flowinfra/flow.go in pkg/sql/flowinfra.(*FlowBase).Run at line 370
pkg/sql/distsql_running.go in pkg/sql.(*DistSQLPlanner).Run at line 405
pkg/sql/distsql_running.go in pkg/sql.(*DistSQLPlanner).PlanAndRun at line 1019
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execWithDistSQLEngine at line 883
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).dispatchToExecutionEngine at line 776
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmtInOpenState at line 481
pkg/sql/conn_executor_exec.go in pkg/sql.(*connExecutor).execStmt at line 96
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).execCmd at line 1368
pkg/sql/conn_executor.go in pkg/sql.(*connExecutor).run at line 1297
pkg/sql/conn_executor.go in pkg/sql.(*Server).ServeConn at line 476
pkg/sql/pgwire/conn.go in pkg/sql/pgwire.(*conn).processCommandsAsync.func1 at line 590
Tag Value
Cockroach Release v20.1.0
Cockroach SHA: 9d456b9
Platform darwin amd64
Distribution CCL
Environment v20.1.0
Command server
Go Version go1.13.9
# of CPUs 8
# of Goroutines 271
@cockroach-teamcity cockroach-teamcity added C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report. labels May 19, 2020
@yuzefovich yuzefovich changed the title sentry: structured.go:3478: read table descriptor for %q (%d.%d) without ModificationTime with zero MVCC timestamp | string; sqlbase.ID; sqlbase.ID sql: v20.1.0: read table descriptor without ModificationTime with zero MVCC timestamp May 19, 2020
@yuzefovich
Copy link
Member

cc @lucy-zhang

@thoszhang
Copy link
Contributor

@ajwerner do you know how this might be possible? This is while fetching a referenced table while migrating the FK representation for a table, though I don't see how that would really matter.

@ajwerner
Copy link
Contributor

I have literally no clue how this could happen. It only seems possible if the returned KV pair returned a zero-value hlc timestamp. This code path always uses a transaction.

In effect, it seems we called GetProtoTs and got a zero-valued hlc.Timestamp and a descriptor that had a non-nil Table in its union. There's something fishy here.

@ajwerner
Copy link
Contributor

Also worth noting that this is a mac. We should probably log.Safe some of these arguments.

One other thing of note is that this is the same cluster that hit #48786 - it'd be interesting to know what is going on in terms of the state of that cluster.

@ajwerner
Copy link
Contributor

@daylerees @davidsbond @rcrowe any chance you observed a panic around this time? I'd love to understand what happened here as it certainly violates my expectations.

@ajwerner
Copy link
Contributor

Also surprising to me is that these seem to be the same clusters but the original issue was using a linux binary and this one seems to be using a macOS binary.

@davidsbond
Copy link

Hi all, our database cluster that was affected by this is using the standard docker image distribution, not 100% sure how we'd end up with a macOS binary. I'm just checking our DB logs to see if it is our cluster

@davidsbond
Copy link

Grepping through the logs of the cluster that was affected is returning no results. We also restored that database from backup as was suggested

@davidsbond
Copy link

Is it possible that while we had the same error the sentry report came from a different cluster to our own?

@thoszhang
Copy link
Contributor

We seem to finally have a lead on this bug. The problem is that getTableDescFromIDRaw, which is called for every table with a FK relationship to the original table in MaybeUpgradeForeignKeyRepresentation, always reads table descriptors from the store. If the other table descriptors have already been modified in the same transaction, we'll read the intent, and the timestamp returned from GetProtoTs will be zero, violating the assumptions of MaybeSetModificationTimeFromMVCCTimestamp.

A potential similar problem occurs with TableDescriptor.Validate(), which needs to fetch other table descriptors to validate cross-references.

The current thinking is that we should stop reading these descriptors directly and instead always go through a desc.Collection. Details still need to be worked out.

@ajwerner
Copy link
Contributor

In the short term, the fix which we can backport is to not enforce this invariant for tables retrieved just for the purpose of this upgrade.

@ajwerner
Copy link
Contributor

We seem to finally have a lead on this bug. The problem is that getTableDescFromIDRaw, which is called for every table with a FK relationship to the original table in MaybeUpgradeForeignKeyRepresentation, always reads table descriptors from the store. If the other table descriptors have already been modified in the same transaction, we'll read the intent, and the timestamp returned from GetProtoTs will be zero, violating the assumptions of MaybeSetModificationTimeFromMVCCTimestamp.

Something occurred to me that this came up at some point, esp in reference to the changes to expose MVCC timestamps to sql. While testing the proposed fix I figured it might be worth ensuring that the concerning behavior actually happens. I wrote this test and it does not find this zero-timestamp behavior. I ran it both on master and release-20.1. I think we still need to understand where the zero comes from.

// TestGetIntentTimestamp tests that a read against an intent returns the value
// with a non-zero timestamp.
func TestGetIntentTimestamp(t *testing.T) {
	defer leaktest.AfterTest(t)()
	s, _, db := serverutils.StartServer(t, base.TestServerArgs{Insecure: true})
	defer s.Stopper().Stop(context.Background())
	ctx := context.Background()

	require.NoError(t, db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
		const key = "aa"
		// Using ptpb.Record as a simple proto message.
		ptRec := ptpb.Record{ID: uuid.MakeV4()}
		if err := txn.Put(context.Background(), key, &ptRec); err != nil {
			t.Fatal(err)
		}
		// Let's ensure that when we read our intent, we read it at a non-zero
		// timestamp.
		var read ptpb.Record
		ts, err := txn.GetProtoTs(ctx, key, &read)
		if err != nil {
			return err
		}
		require.NotZero(t, ts)
		require.EqualValues(t, ptRec, read)
		return nil
	}))
}
$ go test ./pkg/kv/ --run TestGetIntentTimestamp
ok  	github.com/cockroachdb/cockroach/pkg/kv	0.093s

It's worth noting that this behavior also is not good. It means we'll populate descriptors with ModificationTime as the provisional commit timestamp rather than the real commit timestamp. In practice this is probably fine because we're not ever handing these descriptors out for leases.

@thoszhang
Copy link
Contributor

Here's a contrived way to reproduce a zero timestamp, which still seems not too dissimilar to the MaybeUpgradeForeignKeyRepresentation case. One interesting thing is that this doesn't always reproduce the panic; it usually does, but sometimes it succeeds without an error. I couldn't repro a success with -vmodule=replica_evaluate=2, though.

Replace the call to ValidateTable() with Validate() in

if err := table.ValidateTable(); err != nil {

to start reading referenced descriptors in GetMutableDescriptorByID. Then the following usually (but not always) causes a panic due to reading a zero timestamp:

create table t1 (a int primary key);
create table t2 (b int primary key);
create table t3 (a int references t1, b int references t2);
truncate t1 cascade;

Here's the relevant part of the stack trace:

github.com/cockroachdb/cockroach/pkg/util/log.getStacks(0xa9bf800, 0x7cac095, 0x11, 0xc000ff3e20)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/util/log/get_stacks.go:25 +0xb8
github.com/cockroachdb/cockroach/pkg/util/log.(*loggerT).outputLogEntry(0xa9bee60, 0x4, 0x162423d5f5731b70, 0x602, 0xa5bff26, 0x19, 0xfaf, 0xc00077ba40, 0x55, 0x0, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/util/log/clog.go:246 +0xa7e
github.com/cockroachdb/cockroach/pkg/util/log.addStructured(0x86e9c00, 0xc000074118, 0xc000000004, 0x2, 0x7d6fa4e, 0x53, 0xc000ff4340, 0x2, 0x2)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/util/log/structured.go:52 +0x1ac
github.com/cockroachdb/cockroach/pkg/util/log.logDepth(0x86e9c00, 0xc000074118, 0x1, 0x4, 0x7d6fa4e, 0x53, 0xc000ff4340, 0x2, 0x2)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/util/log/log.go:44 +0x8c
github.com/cockroachdb/cockroach/pkg/util/log.Fatalf(...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/util/log/log.go:153
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*Descriptor).MaybeSetModificationTimeFromMVCCTimestamp(0xc000479140, 0x86e9c00, 0xc000074118, 0x0, 0x0)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:4015 +0x666
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*Descriptor).Table(0xc000479140, 0x0, 0xc000000000, 0x7a2d820)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:4034 +0x8f
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.getTableDescFromIDRaw(0x86e9bc0, 0xc00086c400, 0x866a6e0, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x37, 0x10, 0xc000ff4588, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:453 +0x103
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.GetTableDescFromID(0x86e9bc0, 0xc00086c400, 0x866a6e0, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x37, 0x1, 0x0, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:406 +0x87
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*TableDescriptor).validateCrossReferences.func1(0x37, 0xc000ff4948, 0xc000000036, 0xc000ff4b80)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:1791 +0x17f
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*TableDescriptor).validateCrossReferences(0xc001524000, 0x86e9bc0, 0xc00086c400, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x0, 0x0)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:1836 +0x885
github.com/cockroachdb/cockroach/pkg/sql/sqlbase.(*TableDescriptor).Validate(0xc001524000, 0x86e9bc0, 0xc00086c400, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x0, 0x0)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/structured.go:1765 +0xb0
github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv.unwrapDescriptorMutable(0x86e9bc0, 0xc00086c400, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x162423d5f104c8b8, 0x162423d500000000, 0xc0004790a0, 0x0, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv/catalogkv.go:163 +0x41a
github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv.GetMutableDescriptorByID(0x86e9bc0, 0xc00086c400, 0xc0014e0ab0, 0xc000c957e0, 0xc000c957e0, 0x0, 0x36, 0xc000ff5488, 0x1, 0x1, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv/catalogkv.go:147 +0x236
github.com/cockroachdb/cockroach/pkg/sql/catalog/descs.(*Collection).getMutableDescriptorByID(0xc001542228, 0x86e9bc0, 0xc00086c400, 0x36, 0xc0014e0ab0, 0x15, 0xc000ff55a0, 0x1, 0x1)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/catalog/descs/collection.go:521 +0x14f
github.com/cockroachdb/cockroach/pkg/sql/catalog/descs.(*Collection).GetMutableTableVersionByID(0xc001542228, 0x86e9bc0, 0xc00086c400, 0xc000000036, 0xc0014e0ab0, 0x15, 0xc000ff55a0, 0x1)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/catalog/descs/collection.go:505 +0x55
github.com/cockroachdb/cockroach/pkg/sql.(*planner).findAllReferences(0xc0015423d8, 0x86e9bc0, 0xc00086c400, 0xc000770c76, 0x2, 0x300000037, 0x0, 0x0, 0x0, 0x0, ...)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/truncate.go:308 +0x26f
github.com/cockroachdb/cockroach/pkg/sql.(*planner).truncateTable(0xc0015423d8, 0x86e9bc0, 0xc00086c400, 0xc000000037, 0xc000f82780, 0x25, 0x100000000, 0x793b8e0, 0xc001310120)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/truncate.go:214 +0x4fd
github.com/cockroachdb/cockroach/pkg/sql.(*truncateNode).startExec(0xc000010e90, 0x86e9bc0, 0xc00086c400, 0xc000c53b80, 0xc0015423d8, 0x6767800, 0xc0014c8100)
	/Users/lucy/go/src/github.com/cockroachdb/cockroach/pkg/sql/truncate.go:131 +0x981

Starting off with 53 -> t1, 54 -> t2, 55 -> t3, the sequence of writes is:

  • put table 53 (t1) in the drop state
  • update references on table 55 (t3) to point to 56 (the new t1)
  • put table 55 (t1) in the drop state

Then we start trying to determine the dependencies of 55 (t3) so we can determine which ones to also truncate, in planner.findAllReferences. That reads table 54 via GetMutableDescriptorByID, which then calls Validate (with our patch), which fetches table 55 again, which causes the panic.

During TRUNCATE, we don't delete any of these descriptors right away (and, obviously, if they were deleted then we wouldn't get values for them). I don't have any insights into what's going on here.

@ajwerner
Copy link
Contributor

ajwerner commented Aug 4, 2020

Alright, the problem is pretty clear now, when reading an intent we don't get a timestamp. I'll write up a patch to fix it. The problem ends up being that we are reading a key we've already written to so avoiding doing the re-lookup would save us but nevertheless, let's just fix the bug.

@thoszhang
Copy link
Contributor

Wait, can you clarify what the the difference is between what you just described and the non-repro from earlier?

@ajwerner
Copy link
Contributor

ajwerner commented Aug 4, 2020

Give me a second to post a PR. It has to do with sequence numbers and intent history.

ajwerner added a commit to ajwerner/cockroach that referenced this issue Aug 4, 2020
Before this commit, we'd return a zero-value MVCC timestamp when reading an
intent from the intent history. This was problematic because elsewhere in the
code we assume that we will always get a non-zero MVCC timestamp when a read
returns a value. This is especially bizarre given that a read of the latest
intent will return its write timestamp.

The semantics here are such that we'll return the timestamp of the MVCC
metadata for the row. I think this is the most reasonable thing to do as
that timestamp ought to reflect the timestamp we return when we return the
current intent and furthermore is the only timestamp we really have around.
We could return the transactions current read or write timestamp but those
both seem like worse choices.

It's also worth noting that in the case where we need to read a non-zero
value, we don't really care what that value is and the fact that we are
reading this intent itself is somewhat suspect. That being said, I still
think we should make this change in addition to any change made to prevent
the code referenced in cockroachdb#49266 from needing it.

Fixes cockroachdb#49266.
Informs cockroachdb#50102.

Release note: None
@ajwerner
Copy link
Contributor

ajwerner commented Aug 4, 2020

See #52358.

ajwerner added a commit to ajwerner/cockroach that referenced this issue Aug 5, 2020
Before this commit, we'd return a zero-value MVCC timestamp when reading an
intent from the intent history. This was problematic because elsewhere in the
code we assume that we will always get a non-zero MVCC timestamp when a read
returns a value. This is especially bizarre given that a read of the latest
intent will return its write timestamp.

The semantics here are such that we'll return the timestamp of the MVCC
metadata for the row. I think this is the most reasonable thing to do as
that timestamp ought to reflect the timestamp we return when we return the
current intent and furthermore is the only timestamp we really have around.
We could return the transactions current read or write timestamp but those
both seem like worse choices.

It's also worth noting that in the case where we need to read a non-zero
value, we don't really care what that value is and the fact that we are
reading this intent itself is somewhat suspect. That being said, I still
think we should make this change in addition to any change made to prevent
the code referenced in cockroachdb#49266 from needing it.

Fixes cockroachdb#49266.
Informs cockroachdb#50102.

Release note: None
craig bot pushed a commit that referenced this issue Aug 5, 2020
48842: sql: fix portals after exhausting rows r=yuzefovich a=yuzefovich

Previously, we would erroneously restart the execution from the very
beginning of empty, unclosed portals after they have been fully
exhausted when we should be returning no rows or an error in such
scenarios. This is now fixed by tracking whether a portal is exhausted
or not and intercepting the calls to `execStmt` when the conn executor
state machine is in an open state.

Note that the current solution has known deviations from Postgres:
- when attempting to execute portals of statements that don't return row
sets, on the second and consequent attempt PG returns an error while we
are silently doing nothing (meaning we do not run the statement at all
and return 0 rows)
- we incorrectly populate "command tag" field of pgwire messages of some
rows-returning statements after the portal suspension (for example,
a suspended UPDATE RETURNING in PG will return the total count of "rows
affected" while we will return the count since the last suspension).

These deviations are deemed acceptable since this commit fixes a much
worse problem - re-executing an exhausted portal (which could be
a mutation meaning, previously, we could have executed a mutation
multiple times).

The reasons for why this commit does not address these deviations are:
- Postgres has a concept of "portal strategy"
(see https://github.com/postgres/postgres/blob/2f9661311b83dc481fc19f6e3bda015392010a40/src/include/utils/portal.h#L89).
- Postgres has a concept of "command" type (these are things like
SELECTs, UPDATEs, INSERTs, etc,
see https://github.com/postgres/postgres/blob/1aac32df89eb19949050f6f27c268122833ad036/src/include/nodes/nodes.h#L672).

CRDB doesn't have these concepts, and without them any attempt to
simulate Postgres results in a very error-prone and brittle code.

Fixes: #48448.

Release note (bug fix): Previously, CockroachDB would erroneously
restart the execution of empty, unclosed portals after they have been
fully exhausted, and this has been fixed.

52098: sql: better distribute distinct processors r=yuzefovich a=yuzefovich

**sql: better distribute distinct processors**

The distinct processors are planned in two stages - first, a "local"
stage is planned on the same nodes as the previous stage, then,
a "global" stage is added. Previously, the global stage would be planned
on the gateway, and this commit changes that to make it distributed - by
planning "global" distinct processors on the same nodes as the "local"
ones and connecting them via a hash router hashing the distinct columns.

Release note: None

**sql: implement ConstructDistinct in the new factory**

Addresses: #47473.

Release note: None

52358: engine: set the MVCC timestamp on reads due to historical intents r=ajwerner a=ajwerner

Before this commit, we'd return a zero-value MVCC timestamp when reading an
intent from the intent history. This was problematic because elsewhere in the
code we assume that we will always get a non-zero MVCC timestamp when a read
returns a value. This is especially bizarre given that a read of the latest
intent will return its write timestamp.

The semantics here are such that we'll return the timestamp of the MVCC
metadata for the row. I think this is the most reasonable thing to do as
that timestamp ought to reflect the timestamp we return when we return the
current intent and furthermore is the only timestamp we really have around.
We could return the transactions current read or write timestamp but those
both seem like worse choices.

It's also worth noting that in the case where we need to read a non-zero
value, we don't really care what that value is and the fact that we are
reading this intent itself is somewhat suspect. That being said, I still
think we should make this change in addition to any change made to prevent
the code referenced in #49266 from needing it.

Fixes #49266.
Informs #50102.

Release note: None

52384: sql: properly reset extraTxnState in COPY r=ajwerner a=ajwerner

Apparently we support some sort of COPY protocol that I know nothing about.
We allow operations in that protocol in the open state and in the noTxn state
in the connExecutor. In the noTxn state we let the `copyMachine` handle its
transaction lifecycle all on its own. In that case, we also hand have the
`connExecutor` in a fresh state when calling `execCopyIn()`. During the
execution of `COPY`, it seems like sometime we can pick up table descriptor
leases. In the noTxn case we'd like to drop those leases and generally leave
the executor in the fresh state in which it was handed to us. To deal with
that, we call `resetExtraTxnState` before returning in the happy noTxn case.

Fixes #52065.

Release note (bug fix): Fixed a bug when using the COPY protocol which could
prevent schema changes for up to 5 minutes.

Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com>
Co-authored-by: Andrew Werner <ajwerner@cockroachlabs.com>
@craig craig bot closed this as completed in 55467a8 Aug 6, 2020
ajwerner added a commit to ajwerner/cockroach that referenced this issue Sep 29, 2020
Before this commit, we'd return a zero-value MVCC timestamp when reading an
intent from the intent history. This was problematic because elsewhere in the
code we assume that we will always get a non-zero MVCC timestamp when a read
returns a value. This is especially bizarre given that a read of the latest
intent will return its write timestamp.

The semantics here are such that we'll return the timestamp of the MVCC
metadata for the row. I think this is the most reasonable thing to do as
that timestamp ought to reflect the timestamp we return when we return the
current intent and furthermore is the only timestamp we really have around.
We could return the transactions current read or write timestamp but those
both seem like worse choices.

It's also worth noting that in the case where we need to read a non-zero
value, we don't really care what that value is and the fact that we are
reading this intent itself is somewhat suspect. That being said, I still
think we should make this change in addition to any change made to prevent
the code referenced in cockroachdb#49266 from needing it.

Fixes cockroachdb#49266.
Informs cockroachdb#50102.

Release note: None
ajwerner added a commit to ajwerner/cockroach that referenced this issue Sep 29, 2020
Before this commit, we'd return a zero-value MVCC timestamp when reading an
intent from the intent history. This was problematic because elsewhere in the
code we assume that we will always get a non-zero MVCC timestamp when a read
returns a value. This is especially bizarre given that a read of the latest
intent will return its write timestamp.

The semantics here are such that we'll return the timestamp of the MVCC
metadata for the row. I think this is the most reasonable thing to do as
that timestamp ought to reflect the timestamp we return when we return the
current intent and furthermore is the only timestamp we really have around.
We could return the transactions current read or write timestamp but those
both seem like worse choices.

It's also worth noting that in the case where we need to read a non-zero
value, we don't really care what that value is and the fact that we are
reading this intent itself is somewhat suspect. That being said, I still
think we should make this change in addition to any change made to prevent
the code referenced in cockroachdb#49266 from needing it.

Fixes cockroachdb#49266.
Informs cockroachdb#50102.

Release note: None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
C-bug Code not up to spec/doc, specs & docs deemed correct. Solution expected to change code/behavior. O-sentry Originated from an in-the-wild panic report.
Projects
None yet
Development

Successfully merging a pull request may close this issue.

5 participants