Skip to content

Commit

Permalink
fix: update etcd client errors, print etcd join failures
Browse files Browse the repository at this point in the history
Better error message to understand where the error is coming from, also
print errors to console when etcd is trying to join - this is invaluable
to understand why etcd doesn't join the cluster.

Signed-off-by: Andrey Smirnov <smirnov.andrey@gmail.com>
  • Loading branch information
smira authored and talos-bot committed Apr 15, 2021
1 parent 0bd8b0e commit 6cb266e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 14 deletions.
18 changes: 8 additions & 10 deletions internal/app/machined/pkg/system/services/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri

list, err := client.MemberList(ctx)
if err != nil {
return nil, 0, err
return nil, 0, fmt.Errorf("error getting etcd member list: %w", err)
}

for _, member := range list.Members {
Expand All @@ -232,31 +232,29 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri

add, err := client.MemberAdd(ctx, addrs)
if err != nil {
return nil, 0, err
return nil, 0, fmt.Errorf("error adding member: %w", err)
}

list, err = client.MemberList(ctx)
if err != nil {
return nil, 0, err
return nil, 0, fmt.Errorf("error getting second etcd member list: %w", err)
}

return list, add.Member.ID, nil
}

func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string) (initial string, err error) {
err = retry.Constant(10*time.Minute, retry.WithUnits(3*time.Second), retry.WithJitter(time.Second)).Retry(func() error {
err = retry.Constant(10*time.Minute,
retry.WithUnits(3*time.Second),
retry.WithJitter(time.Second),
retry.WithErrorLogging(true),
).RetryWithContext(ctx, func(ctx context.Context) error {
var (
peerAddrs = []string{"https://" + net.FormatAddress(ip) + ":2380"}
resp *clientv3.MemberListResponse
id uint64
)

select {
case <-ctx.Done():
return retry.UnexpectedError(ctx.Err())
default:
}

attemptCtx, attemptCtxCancel := context.WithTimeout(ctx, 30*time.Second)
defer attemptCtxCancel()

Expand Down
8 changes: 4 additions & 4 deletions internal/pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func NewClient(endpoints []string) (client *Client, err error) {

tlsConfig, err := tlsInfo.ClientConfig()
if err != nil {
return nil, err
return nil, fmt.Errorf("error building etcd client TLS config: %w", err)
}

c, err := clientv3.New(clientv3.Config{
Expand All @@ -56,7 +56,7 @@ func NewClient(endpoints []string) (client *Client, err error) {
TLS: tlsConfig,
})
if err != nil {
return nil, err
return nil, fmt.Errorf("error building etcd client: %w", err)
}

return &Client{Client: c}, nil
Expand All @@ -72,13 +72,13 @@ func NewLocalClient() (client *Client, err error) {
func NewClientFromControlPlaneIPs(ctx context.Context, creds *x509.PEMEncodedCertificateAndKey, endpoint *url.URL) (client *Client, err error) {
h, err := kubernetes.NewTemporaryClientFromPKI(creds, endpoint)
if err != nil {
return nil, err
return nil, fmt.Errorf("error building kubernetes client from PKI: %w", err)
}

var endpoints []string

if endpoints, err = h.MasterIPs(ctx); err != nil {
return nil, err
return nil, fmt.Errorf("error getting kubernetes endpoints: %w", err)
}

// Etcd expects host:port format.
Expand Down

0 comments on commit 6cb266e

Please sign in to comment.