Skip to content

Commit

Permalink
cluster: check region info for existing cluster (#1126)
Browse files Browse the repository at this point in the history
* cluster/api: add region info check API

* cluster/check: query for region info from PD for exist clusters
  • Loading branch information
AstroProfundis authored Feb 4, 2021
1 parent 5876ac3 commit 1d4cec5
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 6 deletions.
54 changes: 53 additions & 1 deletion components/cluster/command/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ import (
"path"
"path/filepath"
"strings"
"time"

"github.com/fatih/color"
"github.com/joomcode/errorx"
perrs "github.com/pingcap/errors"
"github.com/pingcap/tiup/pkg/cliutil"
"github.com/pingcap/tiup/pkg/cluster/api"
"github.com/pingcap/tiup/pkg/cluster/ctxt"
"github.com/pingcap/tiup/pkg/cluster/executor"
operator "github.com/pingcap/tiup/pkg/cluster/operation"
Expand Down Expand Up @@ -115,7 +117,17 @@ conflict checks with other clusters`,
}
}

return checkSystemInfo(sshConnProps, &topo, &gOpt, &opt)
if err := checkSystemInfo(sshConnProps, &topo, &gOpt, &opt); err != nil {
return err
}

if !opt.existCluster {
return nil
}
// following checks are all for existing cluster

// check PD status
return checkRegionsInfo(args[0], &topo, &gOpt)
},
}

Expand All @@ -130,6 +142,7 @@ conflict checks with other clusters`,
cmd.Flags().BoolVar(&opt.opr.EnableDisk, "enable-disk", false, "Enable disk IO (fio) check")
cmd.Flags().BoolVar(&opt.applyFix, "apply", false, "Try to fix failed checks")
cmd.Flags().BoolVar(&opt.existCluster, "cluster", false, "Check existing cluster, the input is a cluster name.")
cmd.Flags().Uint64Var(&gOpt.APITimeout, "api-timeout", 10, "Timeout in seconds when querying PD APIs.")

return cmd
}
Expand Down Expand Up @@ -502,3 +515,42 @@ func fixFailedChecks(host string, res *operator.CheckResult, t *task.Builder) (s
}
return msg, nil
}

// checkRegionsInfo checks peer status from PD
func checkRegionsInfo(clusterName string, topo *spec.Specification, gOpt *operator.Options) error {
log.Infof("Checking region status of the cluster %s...", clusterName)

tlsConfig, err := topo.TLSConfig(tidbSpec.Path(clusterName, spec.TLSCertKeyDir))
if err != nil {
return err
}
pdClient := api.NewPDClient(
topo.GetPDList(),
time.Second*time.Duration(gOpt.APITimeout),
tlsConfig,
)

hasUnhealthy := false
for _, state := range []string{
"miss-peer",
"pending-peer",
} {
rInfo, err := pdClient.CheckRegion(state)
if err != nil {
return err
}
if rInfo.Count > 0 {
log.Warnf(
"Regions are not fully healthy: %s",
color.YellowString("%d %s", rInfo.Count, state),
)
hasUnhealthy = true
}
}
if hasUnhealthy {
log.Warnf("Please fix unhealthy regions before other operations.")
} else {
log.Infof("All regions are healthy.")
}
return nil
}
28 changes: 23 additions & 5 deletions pkg/cluster/api/pdapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ func (pc *PDClient) GetURL(addr string) string {
return fmt.Sprintf("%s://%s", httpPrefix, addr)
}

const (
// pdEvictLeaderName is evict leader scheduler name.
pdEvictLeaderName = "evict-leader-scheduler"
)

// nolint (some is unused now)
var (
pdPingURI = "pd/ping"
Expand All @@ -77,6 +82,7 @@ var (
pdLeaderTransferURI = "pd/api/v1/leader/transfer"
pdConfigReplicate = "pd/api/v1/config/replicate"
pdConfigSchedule = "pd/api/v1/config/schedule"
pdRegionsCheckURI = "pd/api/v1/regions/check"
)

func tryURLs(endpoints []string, f func(endpoint string) ([]byte, error)) ([]byte, error) {
Expand Down Expand Up @@ -359,11 +365,6 @@ func (pc *PDClient) EvictPDLeader(retryOpt *utils.RetryOption) error {
return nil
}

const (
// pdEvictLeaderName is evict leader scheduler name.
pdEvictLeaderName = "evict-leader-scheduler"
)

// pdSchedulerRequest is the request body when evicting store leader
type pdSchedulerRequest struct {
Name string `json:"name"`
Expand Down Expand Up @@ -704,3 +705,20 @@ func (pc *PDClient) GetTiKVLabels() (map[string]map[string]string, error) {
func (pc *PDClient) UpdateScheduleConfig(body io.Reader) error {
return pc.updateConfig(body, pdConfigSchedule)
}

// CheckRegion queries for the region with specific status
func (pc *PDClient) CheckRegion(state string) (*pdserverapi.RegionsInfo, error) {
uri := pdRegionsCheckURI + "/" + state
endpoints := pc.getEndpoints(uri)
regionsInfo := pdserverapi.RegionsInfo{}

_, err := tryURLs(endpoints, func(endpoint string) ([]byte, error) {
body, err := pc.httpClient.Get(endpoint)
if err != nil {
return body, err
}

return body, json.Unmarshal(body, &regionsInfo)
})
return &regionsInfo, err
}

0 comments on commit 1d4cec5

Please sign in to comment.