Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vtctl|vtctldserver] List/Get Tablets timeouts #7715

Merged
merged 12 commits into from
Mar 22, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions go/cmd/vtctldclient/internal/command/tablets.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"vitess.io/vitess/go/cmd/vtctldclient/cli"
"vitess.io/vitess/go/vt/topo/topoproto"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata"
)

Expand All @@ -49,7 +50,7 @@ var (
}
// GetTablets makes a GetTablets gRPC call to a vtctld.
GetTablets = &cobra.Command{
Use: "GetTablets [--cell $c1, ...] [--keyspace $ks [--shard $shard]]",
Use: "GetTablets [--strict] [{--cell $c1 [--cell $c2 ...], --keyspace $ks [--shard $shard], --tablet-alias $alias}]",
Args: cobra.NoArgs,
RunE: commandGetTablets,
}
Expand Down Expand Up @@ -149,7 +150,10 @@ var getTabletsOptions = struct {
Keyspace string
Shard string

TabletAliasStrings []string

Format string
Strict bool
}{}

func commandGetTablets(cmd *cobra.Command, args []string) error {
Expand All @@ -161,16 +165,37 @@ func commandGetTablets(cmd *cobra.Command, args []string) error {
return fmt.Errorf("invalid output format, got %s", getTabletsOptions.Format)
}

var aliases []*topodatapb.TabletAlias

if len(getTabletsOptions.TabletAliasStrings) > 0 {
switch {
case getTabletsOptions.Keyspace != "":
return fmt.Errorf("--keyspace (= %s) cannot be passed when using --tablet-alias (= %v)", getTabletsOptions.Keyspace, getTabletsOptions.TabletAliasStrings)
case getTabletsOptions.Shard != "":
return fmt.Errorf("--shard (= %s) cannot be passed when using --tablet-alias (= %v)", getTabletsOptions.Shard, getTabletsOptions.TabletAliasStrings)
case len(getTabletsOptions.Cells) > 0:
return fmt.Errorf("--cell (= %v) cannot be passed when using --tablet-alias (= %v)", getTabletsOptions.Cells, getTabletsOptions.TabletAliasStrings)
}

var err error
aliases, err = cli.TabletAliasesFromPosArgs(getTabletsOptions.TabletAliasStrings)
if err != nil {
return err
}
}

if getTabletsOptions.Keyspace == "" && getTabletsOptions.Shard != "" {
return fmt.Errorf("--shard (= %s) cannot be passed without also passing --keyspace", getTabletsOptions.Shard)
}

cli.FinishedParsing(cmd)

resp, err := client.GetTablets(commandCtx, &vtctldatapb.GetTabletsRequest{
Cells: getTabletsOptions.Cells,
Keyspace: getTabletsOptions.Keyspace,
Shard: getTabletsOptions.Shard,
TabletAliases: aliases,
Cells: getTabletsOptions.Cells,
Keyspace: getTabletsOptions.Keyspace,
Shard: getTabletsOptions.Shard,
Strict: getTabletsOptions.Strict,
})
if err != nil {
return err
Expand Down Expand Up @@ -202,9 +227,11 @@ func init() {

Root.AddCommand(GetTablet)

GetTablets.Flags().StringSliceVarP(&getTabletsOptions.Cells, "cell", "c", nil, "list of cells to filter tablets by")
GetTablets.Flags().StringVarP(&getTabletsOptions.Keyspace, "keyspace", "k", "", "keyspace to filter tablets by")
GetTablets.Flags().StringVarP(&getTabletsOptions.Shard, "shard", "s", "", "shard to filter tablets by")
GetTablets.Flags().StringSliceVarP(&getTabletsOptions.TabletAliasStrings, "tablet-alias", "t", nil, "List of tablet aliases to filter by")
GetTablets.Flags().StringSliceVarP(&getTabletsOptions.Cells, "cell", "c", nil, "List of cells to filter tablets by")
GetTablets.Flags().StringVarP(&getTabletsOptions.Keyspace, "keyspace", "k", "", "Keyspace to filter tablets by")
GetTablets.Flags().StringVarP(&getTabletsOptions.Shard, "shard", "s", "", "Shard to filter tablets by")
GetTablets.Flags().StringVar(&getTabletsOptions.Format, "format", "awk", "Output format to use; valid choices are (json, awk)")
GetTablets.Flags().BoolVar(&getTabletsOptions.Strict, "strict", false, "Require all cells to return successful tablet data. Without --strict, tablet listings may be partial.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chef kiss

Root.AddCommand(GetTablets)
}
461 changes: 287 additions & 174 deletions go/vt/proto/vtctldata/vtctldata.pb.go

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions go/vt/topo/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package topo

import (
"errors"
"fmt"
)

Expand Down Expand Up @@ -80,8 +81,15 @@ func (e Error) Error() string {

// IsErrType returns true if the error has the specified ErrorCode.
func IsErrType(err error, code ErrorCode) bool {
var e Error

if errors.As(err, &e) {
return e.code == code
}

if e, ok := err.(Error); ok {
return e.code == code
}

return false
}
135 changes: 104 additions & 31 deletions go/vt/vtctl/grpcvtctldserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,12 +628,49 @@ func (s *VtctldServer) GetTablets(ctx context.Context, req *vtctldatapb.GetTable
}
}

if req.Keyspace != "" && req.Shard != "" {
tabletMap, err := s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard)
// Create a context for our per-cell RPCs, with a timeout upper-bounded at
// the RemoteOperationTimeout.
//
// Per-cell goroutines may also cancel this context if they fail and the
// request specified Strict=true to allow us to fail faster.
ctx, cancel := context.WithTimeout(ctx, *topo.RemoteOperationTimeout)
defer cancel()

var (
tabletMap map[string]*topo.TabletInfo
err error
)

switch {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure to what extent this is a golang thing or a you thing, but I really like this (and other) uses of switch (instead of if/else, presumably). Going to osmose this pattern into our TypeScript more often. >:)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably a mix of both haha

case len(req.TabletAliases) > 0:
tabletMap, err = s.ts.GetTabletMap(ctx, req.TabletAliases)
if err != nil {
err = fmt.Errorf("GetTabletMap(%v) failed: %w", req.TabletAliases, err)
}
case req.Keyspace != "" && req.Shard != "":
tabletMap, err = s.ts.GetTabletMapForShard(ctx, req.Keyspace, req.Shard)
if err != nil {
err = fmt.Errorf("GetTabletMapForShard(%s, %s) failed: %w", req.Keyspace, req.Shard, err)
}
default:
// goto the req.Cells branch
tabletMap = nil
}

if err != nil {
switch {
case topo.IsErrType(err, topo.PartialResult):
if req.Strict {
return nil, err
}

log.Warningf("GetTablets encountered non-fatal error %s; continuing because Strict=false", err)
default:
return nil, err
}
}

if tabletMap != nil {
var trueMasterTimestamp time.Time
for _, ti := range tabletMap {
if ti.Type == topodatapb.TabletType_MASTER {
Expand Down Expand Up @@ -663,49 +700,85 @@ func (s *VtctldServer) GetTablets(ctx context.Context, req *vtctldatapb.GetTable
cells = c
}

var allTablets []*topodatapb.Tablet
var (
m sync.Mutex
wg sync.WaitGroup
rec concurrency.AllErrorRecorder
allTablets []*topo.TabletInfo
)

for _, cell := range cells {
tablets, err := topotools.GetAllTablets(ctx, s.ts, cell)
if err != nil {
return nil, err
}

// Collect true master term start times, and optionally filter out any
// tablets by keyspace according to the request.
masterTermStartTimes := map[string]time.Time{}
filteredTablets := make([]*topo.TabletInfo, 0, len(tablets))
wg.Add(1)

for _, tablet := range tablets {
if req.Keyspace != "" && tablet.Keyspace != req.Keyspace {
continue
}
go func(cell string) {
defer wg.Done()

key := tablet.Keyspace + "." + tablet.Shard
if v, ok := masterTermStartTimes[key]; ok {
if tablet.GetMasterTermStartTime().After(v) {
masterTermStartTimes[key] = tablet.GetMasterTermStartTime()
tablets, err := topotools.GetAllTablets(ctx, s.ts, cell)
if err != nil {
if req.Strict {
log.Infof("GetTablets got an error from cell %s: %s. Running in strict mode, so canceling other cell RPCs", cell, err)
cancel()
}
} else {
masterTermStartTimes[key] = tablet.GetMasterTermStartTime()

rec.RecordError(fmt.Errorf("GetAllTablets(cell = %s) failed: %w", cell, err))

return
}

filteredTablets = append(filteredTablets, tablet)
m.Lock()
defer m.Unlock()
allTablets = append(allTablets, tablets...)
}(cell)
}

wg.Wait()

if rec.HasErrors() {
if req.Strict {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Super minor point that you are welcome to ignore -- my $0.02 is that if req.Strict || len(rec.Errors) == len(cells) is slightly more readable (or at least, what I'd expect, since it took me a sec to realize the two ifs are doing the same thing).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not exactly :) (I ran into this issue when a test broke)

If len(cells) == 0, then len(rec.Errors) == len(cells) but rec.HasErrors() == false, which is why we have to be a little more verbose/redundant here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I misunderstood! I thought you were suggesting to replace the outer if rec.HasErrors() with the composite boolean, not to replace the two branches. You're totally right, and it's way easier to understand. Pushing up a fix! 😊

return nil, rec.Error()
}

if len(rec.Errors) == len(cells) {
return nil, rec.Error()
}
}

// Collect true master term start times, and optionally filter out any
// tablets by keyspace according to the request.
masterTermStartTimes := map[string]time.Time{}
filteredTablets := make([]*topo.TabletInfo, 0, len(allTablets))

// collect the tablets with adjusted master term start times. they've
// already been filtered by the above loop, so no keyspace filtering
// here.
for _, ti := range filteredTablets {
key := ti.Keyspace + "." + ti.Shard
adjustTypeForStalePrimary(ti, masterTermStartTimes[key])
for _, tablet := range allTablets {
if req.Keyspace != "" && tablet.Keyspace != req.Keyspace {
continue
}

allTablets = append(allTablets, ti.Tablet)
key := tablet.Keyspace + "." + tablet.Shard
if v, ok := masterTermStartTimes[key]; ok {
if tablet.GetMasterTermStartTime().After(v) {
masterTermStartTimes[key] = tablet.GetMasterTermStartTime()
}
} else {
masterTermStartTimes[key] = tablet.GetMasterTermStartTime()
}

filteredTablets = append(filteredTablets, tablet)
}

adjustedTablets := make([]*topodatapb.Tablet, len(filteredTablets))

// collect the tablets with adjusted master term start times. they've
// already been filtered by the above loop, so no keyspace filtering
// here.
for i, ti := range filteredTablets {
key := ti.Keyspace + "." + ti.Shard
adjustTypeForStalePrimary(ti, masterTermStartTimes[key])

adjustedTablets[i] = ti.Tablet
}

return &vtctldatapb.GetTabletsResponse{
Tablets: allTablets,
Tablets: adjustedTablets,
}, nil
}

Expand Down
Loading