-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
VReplication: Guard against unsafe _vt.vreplication writes #14797
Changes from all commits
c1c2b15
0507f64
385e9fe
eefef60
2296b43
ba13692
8a922fd
a79e22c
3a4f2cc
f393fac
8369759
bc23f63
b67b75f
2f01db1
3c5e4d0
e3a4fcf
808308a
352d6be
d8e7dbb
0cf48fc
06bf409
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,7 +70,7 @@ func (tm *TabletManager) HandleRPCPanic(ctx context.Context, name string, args, | |
if *err != nil { | ||
// error case | ||
log.Warningf("TabletManager.%v(%v)(on %v from %v) error: %v", name, args, topoproto.TabletAliasString(tm.tabletAlias), from, (*err).Error()) | ||
*err = vterrors.Wrapf(*err, "TabletManager.%v on %v error: %v", name, topoproto.TabletAliasString(tm.tabletAlias), (*err).Error()) | ||
*err = vterrors.Wrapf(*err, "TabletManager.%v on %v", name, topoproto.TabletAliasString(tm.tabletAlias)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This removes an improper usage of wrapping which led to the error/cause being duplicated. |
||
} else { | ||
// success case | ||
log.Infof("TabletManager.%v(%v)(on %v from %v): %#v", name, args, topoproto.TabletAliasString(tm.tabletAlias), from, reply) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ package vreplication | |
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"vitess.io/vitess/go/constants/sidecar" | ||
"vitess.io/vitess/go/vt/sqlparser" | ||
|
@@ -50,6 +51,80 @@ const ( | |
reshardingJournalQuery | ||
) | ||
|
||
// A comment directive that you can include in your VReplication write | ||
// statements if you want to bypass the safety checks that ensure you are | ||
// being selective. The full comment directive looks like this: | ||
// delete /*vt+ ALLOW_UNSAFE_VREPLICATION_WRITE */ from _vt.vreplication | ||
const AllowUnsafeWriteCommentDirective = "ALLOW_UNSAFE_VREPLICATION_WRITE" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ |
||
|
||
// Check that the given WHERE clause is using at least one of the specified | ||
// columns with an equality or in operator to ensure that it is being | ||
// properly selective and not unintentionally going to potentially affect | ||
// multiple workflows. | ||
// The engine's exec function -- used by the VReplicationExec RPC -- should | ||
// provide guardrails for data changing statements and if the user wants get | ||
// around them they can e.g. use the ExecuteFetchAsDba RPC. | ||
// If you as a developer truly do want to affect multiple workflows, you can | ||
// add a comment directive using the AllowUnsafeWriteCommentDirective constant. | ||
var isSelective = func(where *sqlparser.Where, columns ...*sqlparser.ColName) bool { | ||
if where == nil { | ||
return false | ||
} | ||
if len(columns) == 0 { | ||
return true | ||
} | ||
selective := false | ||
_ = sqlparser.Walk(func(node sqlparser.SQLNode) (kontinue bool, err error) { | ||
switch node := node.(type) { | ||
case *sqlparser.ComparisonExpr: | ||
column, ok := node.Left.(*sqlparser.ColName) | ||
if !ok { | ||
return true, nil | ||
} | ||
wantedColumn := false | ||
for i := range columns { | ||
if columns[i].Equal(column) { | ||
wantedColumn = true | ||
break | ||
} | ||
} | ||
// If we found a desired column, check that it is being used with an | ||
// equality operator OR an in clause, logically being equal to any | ||
// of N things. | ||
if wantedColumn && | ||
(node.Operator == sqlparser.EqualOp || node.Operator == sqlparser.InOp) { | ||
selective = true // This is a safe statement | ||
return false, nil // We can stop walking | ||
} | ||
default: | ||
} | ||
return true, nil | ||
}, where) | ||
return selective | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice one! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this only limited to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like it's probably a good thing to expand over time, but at this point vreplication.Engine.Exec() is the critical one and what's done here. |
||
|
||
// tableSelectiveColumns is a map that can be used to declare | ||
// what selective columns should be used (one or more) in queries | ||
// against a table. | ||
var tableSelectiveColumns = map[string][]*sqlparser.ColName{ | ||
vreplicationTableName: { | ||
{Name: sqlparser.NewIdentifierCI("id")}, | ||
{Name: sqlparser.NewIdentifierCI("workflow")}, | ||
}, | ||
} | ||
|
||
// columnsAsCSV returns a comma-separated list of column names. | ||
func columnsAsCSV(columns []*sqlparser.ColName) string { | ||
if len(columns) == 0 { | ||
return "" | ||
} | ||
colsForError := make([]string, len(columns)) | ||
for i := range columns { | ||
colsForError[i] = columns[i].Name.String() | ||
} | ||
return strings.Join(colsForError, ", ") | ||
} | ||
|
||
// buildControllerPlan parses the input query and returns an appropriate plan. | ||
func buildControllerPlan(query string, parser *sqlparser.Parser) (*controllerPlan, error) { | ||
stmt, err := parser.Parse(query) | ||
|
@@ -163,7 +238,12 @@ func buildUpdatePlan(upd *sqlparser.Update) (*controllerPlan, error) { | |
opcode: reshardingJournalQuery, | ||
}, nil | ||
case vreplicationTableName: | ||
// no-op | ||
if upd.Comments == nil || upd.Comments.Directives() == nil || !upd.Comments.Directives().IsSet(AllowUnsafeWriteCommentDirective) { | ||
if safe := isSelective(upd.Where, tableSelectiveColumns[vreplicationTableName]...); !safe { | ||
return nil, fmt.Errorf("unsafe WHERE clause in update without the /*vt+ %s */ comment directive: %s; should be using = or in with at least one of the following columns: %s", | ||
AllowUnsafeWriteCommentDirective, sqlparser.String(upd.Where), columnsAsCSV(tableSelectiveColumns[vreplicationTableName])) | ||
} | ||
} | ||
default: | ||
return nil, fmt.Errorf("invalid table name: %s", tableName.Name.String()) | ||
} | ||
|
@@ -220,7 +300,12 @@ func buildDeletePlan(del *sqlparser.Delete) (*controllerPlan, error) { | |
opcode: reshardingJournalQuery, | ||
}, nil | ||
case vreplicationTableName: | ||
// no-op | ||
if del.Comments == nil || del.Comments.Directives() == nil || !del.Comments.Directives().IsSet(AllowUnsafeWriteCommentDirective) { | ||
if safe := isSelective(del.Where, tableSelectiveColumns[vreplicationTableName]...); !safe { | ||
return nil, fmt.Errorf("unsafe WHERE clause in delete without the /*vt+ %s */ comment directive: %s; should be using = or in with at least one of the following columns: %s", | ||
AllowUnsafeWriteCommentDirective, sqlparser.String(del.Where), columnsAsCSV(tableSelectiveColumns[vreplicationTableName])) | ||
} | ||
} | ||
default: | ||
return nil, fmt.Errorf("invalid table name: %s", tableName.Name.String()) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is an issue in CI is just a 3 second retry enough?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's only been seen in the CI AFAIK, and so far 3 retries with a 1 second delay seems to have been enough (we've done essentially the same thing in several places now).