diff --git a/protocols/client/client.go b/protocols/client/client.go index fb04cbab2..aac0dd1a2 100644 --- a/protocols/client/client.go +++ b/protocols/client/client.go @@ -9,6 +9,7 @@ package client import ( "bufio" "context" + "errors" "fmt" "net" "net/url" @@ -400,6 +401,7 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { } dialFunc := func() (net.Conn, error) { + handshakeTimeout := 10 * time.Second conn, err := net.DialTimeout("unix", udsPath, timeout) if err != nil { return nil, err @@ -418,26 +420,41 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) { return nil, err } - // A trivial handshake is included in the host-initiated vsock connection protocol. - // It looks like this: - // - [host] CONNECT - // - [guest/success] OK - reader := bufio.NewReader(conn) - response, err := reader.ReadString('\n') - if err != nil { - conn.Close() - agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed") - // for now, we temporarily rely on the backoff strategy from GRPC for more stable CI. + errChan := make(chan error) + + go func() { + reader := bufio.NewReader(conn) + response, err := reader.ReadString('\n') + if err != nil { + errChan <- err + return + } + + agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake") + + if strings.Contains(response, "OK") { + errChan <- nil + } else { + errChan <- errors.New("HybridVsock trivial handshake failed with malformed response code") + } + }() + + select { + case err = <-errChan: + if err != nil { + conn.Close() + agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed") + return nil, err + + } return conn, nil - } else if !strings.Contains(response, "OK") { + case <-time.After(handshakeTimeout): + // Timeout: kernel vsock implementation has a race condition, where no response is given + // Instead of waiting forever for a response, timeout after a fair amount of time. + // See: https://lore.kernel.org/netdev/668b0eda8823564cd604b1663dc53fbaece0cd4e.camel@intel.com/ conn.Close() - agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake failed with malformd response code") - // for now, we temporarily rely on the backoff strategy from GRPC for more stable CI. - return conn, nil + return nil, errors.New("timeout waiting for hybrid vsocket handshake") } - agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake") - - return conn, nil } timeoutErr := grpcStatus.Errorf(codes.DeadlineExceeded, "timed out connecting to hybrid vsocket %s", sock)