Skip to content

Commit

Permalink
distsql: marshall the whole pErr cause in retryable errors
Browse files Browse the repository at this point in the history
We need the OriginNode from it, and also the clock signal. The
OriginNode is necessary for ReadWithinUncertaintyInterval errors;
otherwise we were failing to find the required updated timestamp when
restarting the txn.
  • Loading branch information
andreimatei committed Apr 28, 2017
1 parent 7cf5509 commit 72fa944
Show file tree
Hide file tree
Showing 10 changed files with 184 additions and 325 deletions.
28 changes: 16 additions & 12 deletions pkg/internal/client/txn.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,31 +961,35 @@ func firstWriteIndex(ba roachpb.BatchRequest) (int, *roachpb.Error) {
// the txn. If the error is not a RetryableTxnError, then this is a no-op. For a
// retryable error, the Transaction proto is either initialized with the updated
// proto from the error, or a new Transaction proto is initialized.
func (txn *Txn) UpdateStateOnRemoteRetryableErr(
ctx context.Context, retryErr roachpb.DistSQLRetryableTxnError,
) {
func (txn *Txn) UpdateStateOnRemoteRetryableErr(ctx context.Context, pErr roachpb.Error) {
txn.mu.Lock()
defer txn.mu.Unlock()

if pErr.TransactionRestart == roachpb.TransactionRestart_NONE {
log.Fatalf(ctx, "unexpected non-retryable error: %s", pErr)
}
if pErr.GetTxn() == nil {
// DistSQL requests (like all SQL requests) are always supposed to be done
// in a transaction.
log.Fatalf(ctx, "unexpected retryable error with no txn ran through DistSQL: %s", pErr)
}

// Assert that the TxnCoordSender doesn't have any state for this transaction
// (and it shouldn't, since DistSQL isn't supposed to do any works in
// transaction that had performed writes and hence started being tracked). If
// the TxnCoordSender were to have state, it'd be a bad thing that we're not
// updating it.
if retryErr.TxnID != nil {
if _, ok := txn.db.GetSender().(SenderWithDistSQLBackdoor).GetTxnState(*retryErr.TxnID); ok {
log.Fatalf(ctx, "unexpected state in TxnCoordSender for transaction in error: %s", retryErr)
// TODO(andrei): remove nil check once #15024 is merged.
if txnID := pErr.GetTxn().ID; txnID != nil {
if _, ok := txn.db.GetSender().(SenderWithDistSQLBackdoor).GetTxnState(*txnID); ok {
log.Fatalf(ctx, "unexpected state in TxnCoordSender for transaction in error: %s", pErr)
}
}

// Reconstruct a pErr suitable for a roachpb.PrepareTransactionForRetry()
// call.
pErr := roachpb.NewErrorWithTxn(retryErr.Cause.GetValue().(roachpb.ErrorDetailInterface), retryErr.Transaction)

// Emulate the processing that the TxnCoordSender would have done on this
// error.
newTxn := roachpb.PrepareTransactionForRetry(ctx, pErr, txn.mu.UserPriority)
newErr := roachpb.NewHandledRetryableTxnError(pErr.Message, retryErr.TxnID, newTxn)
newTxn := roachpb.PrepareTransactionForRetry(ctx, &pErr, txn.mu.UserPriority)
newErr := roachpb.NewHandledRetryableTxnError(pErr.Message, pErr.GetTxn().ID, newTxn)

txn.updateStateOnRetryableErrLocked(
ctx, *newErr,
Expand Down
5 changes: 3 additions & 2 deletions pkg/roachpb/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -1008,8 +1008,9 @@ func PrepareTransactionForRetry(ctx context.Context, pErr *Error, pri UserPriori
ts, ok := txn.GetObservedTimestamp(pErr.OriginNode)
if !ok {
log.Fatalf(ctx,
"missing observed timestamp for node %d found on uncertainty restart",
pErr.OriginNode)
"missing observed timestamp for node %d found on uncertainty restart. "+
"err: %s. txn: %s. Observed timestamps: %s",
pErr.OriginNode, pErr, txn, txn.ObservedTimestamps)
}
txn.Timestamp.Forward(ts)
case *TransactionPushError:
Expand Down
16 changes: 2 additions & 14 deletions pkg/roachpb/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
)

func (e *DistSQLRetryableTxnError) Error() string {
return e.Msg
return e.PErr.Message
}

var _ error = &DistSQLRetryableTxnError{}
Expand Down Expand Up @@ -129,20 +129,8 @@ func (e *Error) GoError() error {
}

if e.TransactionRestart != TransactionRestart_NONE {
var txnID *uuid.UUID

// TODO(andrei): Can e.GetTxn() be nil here? It can't be if the request that
// generated the error was sent through client.Txn. If client.Txn wasn't
// used, is it still possible to get retryable errors?
if e.GetTxn() != nil {
txnID = e.GetTxn().ID
}

return &DistSQLRetryableTxnError{
Cause: *e.Detail,
Msg: e.Message,
TxnID: txnID,
Transaction: e.GetTxn(),
PErr: *e,
}
}
return e.GetDetail()
Expand Down
Loading

0 comments on commit 72fa944

Please sign in to comment.