Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
protocols: client: Add timeout for hybrid vsock handshake
Browse files Browse the repository at this point in the history
When the client tries to connect sometimes a race condition
could happen when the between bind and listen calls in the agent
vsock side.

This will block the hypervisor wanting for a response and
as consequence the agent client where it checks for an OK
response.

This case needs to be handled by the guest kernel, see
https://lore.kernel.org/netdev/668b0eda8823564cd604b1663dc53fbaece0cd4e.camel@intel.com/

As an extra protection make the agent client timeout if no OK response
is given. The response should be quick so is OK to wait a few seconds
and then timeout.

This also allow to return an error from the dialler function so retry
does not fallback on grpc retry making retries faster.

Fixes: #372

Signed-off-by: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
  • Loading branch information
jcvenegas committed Feb 20, 2020
1 parent 36b37f6 commit 8f57eda
Showing 1 changed file with 34 additions and 17 deletions.
51 changes: 34 additions & 17 deletions protocols/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package client
import (
"bufio"
"context"
"errors"
"fmt"
"net"
"net/url"
Expand Down Expand Up @@ -400,6 +401,7 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) {
}

dialFunc := func() (net.Conn, error) {
handshakeTimeout := 10 * time.Second
conn, err := net.DialTimeout("unix", udsPath, timeout)
if err != nil {
return nil, err
Expand All @@ -418,26 +420,41 @@ func HybridVSockDialer(sock string, timeout time.Duration) (net.Conn, error) {
return nil, err
}

// A trivial handshake is included in the host-initiated vsock connection protocol.
// It looks like this:
// - [host] CONNECT <port><LF>
// - [guest/success] OK <assigned_host_port><LF>
reader := bufio.NewReader(conn)
response, err := reader.ReadString('\n')
if err != nil {
conn.Close()
agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed")
// for now, we temporarily rely on the backoff strategy from GRPC for more stable CI.
errChan := make(chan error)

go func() {
reader := bufio.NewReader(conn)
response, err := reader.ReadString('\n')
if err != nil {
errChan <- err
return
}

agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake")

if strings.Contains(response, "OK") {
errChan <- nil
} else {
errChan <- errors.New("HybridVsock trivial handshake failed with malformed response code")
}
}()

select {
case err = <-errChan:
if err != nil {
conn.Close()
agentClientLog.WithField("Error", err).Debug("HybridVsock trivial handshake failed")
return nil, err

}
return conn, nil
} else if !strings.Contains(response, "OK") {
case <-time.After(handshakeTimeout):
// Timeout: kernel vsock implementation has a race condition, where no response is given
// Instead of waiting forever for a response, timeout after a fair amount of time.
// See: https://lore.kernel.org/netdev/668b0eda8823564cd604b1663dc53fbaece0cd4e.camel@intel.com/
conn.Close()
agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake failed with malformd response code")
// for now, we temporarily rely on the backoff strategy from GRPC for more stable CI.
return conn, nil
return nil, errors.New("timeout waiting for hybrid vsocket handshake")
}
agentClientLog.WithField("response", response).Debug("HybridVsock trivial handshake")

return conn, nil
}

timeoutErr := grpcStatus.Errorf(codes.DeadlineExceeded, "timed out connecting to hybrid vsocket %s", sock)
Expand Down

0 comments on commit 8f57eda

Please sign in to comment.