Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvm: recover from minikube-net network left over failures #9641

Merged
merged 2 commits into from
Nov 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions pkg/drivers/kvm/kvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ func (d *Driver) PreCommandCheck() error {
if err != nil {
return errors.Wrap(err, "error connecting to libvirt socket. Have you added yourself to the libvirtd group?")
}
defer conn.Close()

libVersion, err := conn.GetLibVersion()
if err != nil {
return errors.Wrap(err, "getting libvirt version")
Expand Down Expand Up @@ -240,14 +242,6 @@ func (d *Driver) Restart() error {

// Start a host
func (d *Driver) Start() (err error) {
// if somebody/something deleted the network in the meantime,
// we might need to recreate it. It's (nearly) a noop if the network exists.
log.Info("Creating network...")
err = d.createNetwork()
if err != nil {
return errors.Wrap(err, "creating network")
}

// this call ensures that all networks are active
log.Info("Ensuring networks are active...")
err = d.ensureNetwork()
Expand Down Expand Up @@ -490,3 +484,14 @@ func (d *Driver) undefineDomain(conn *libvirt.Connect, dom *libvirt.Domain) erro

return dom.Undefine()
}

// lvErr will return libvirt Error struct containing specific libvirt error code, domain, message and level
func lvErr(err error) libvirt.Error {
if err != nil {
if lverr, ok := err.(libvirt.Error); ok {
return lverr
}
return libvirt.Error{Code: libvirt.ERR_INTERNAL_ERROR, Message: "internal error"}
}
return libvirt.Error{Code: libvirt.ERR_OK, Message: ""}
}
105 changes: 91 additions & 14 deletions pkg/drivers/kvm/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ import (
"io/ioutil"
"strings"
"text/template"
"time"

"github.com/docker/machine/libmachine/log"
libvirt "github.com/libvirt/libvirt-go"
"github.com/pkg/errors"
"k8s.io/minikube/pkg/util/retry"
)

// Replace with hardcoded range with CIDR
Expand All @@ -53,6 +55,7 @@ func setupNetwork(conn *libvirt.Connect, name string) error {
if err != nil {
return errors.Wrapf(err, "checking network %s", name)
}
defer func() { _ = n.Free() }()

// always ensure autostart is set on the network
autostart, err := n.GetAutostart()
Expand All @@ -75,7 +78,6 @@ func setupNetwork(conn *libvirt.Connect, name string) error {
return errors.Wrapf(err, "starting network %s", name)
}
}

return nil
}

Expand All @@ -99,8 +101,21 @@ func (d *Driver) ensureNetwork() error {

// Start the private network
log.Infof("Ensuring network %s is active", d.PrivateNetwork)
// retry once to recreate the network, but only if is not used by another minikube instance
if err := setupNetwork(conn, d.PrivateNetwork); err != nil {
return err
log.Debugf("Network %s is inoperable, will try to recreate it: %v", d.PrivateNetwork, err)
if err := d.deleteNetwork(); err != nil {
return errors.Wrapf(err, "deleting inoperable network %s", d.PrivateNetwork)
}
log.Debugf("Successfully deleted %s network", d.PrivateNetwork)
if err := d.createNetwork(); err != nil {
return errors.Wrapf(err, "recreating inoperable network %s", d.PrivateNetwork)
}
log.Debugf("Successfully recreated %s network", d.PrivateNetwork)
if err := setupNetwork(conn, d.PrivateNetwork); err != nil {
return err
}
log.Debugf("Successfully activated %s network", d.PrivateNetwork)
}

return nil
Expand All @@ -120,13 +135,16 @@ func (d *Driver) createNetwork() error {

// network: default
// It is assumed that the libvirt/kvm installation has already created this network
if _, err := conn.LookupNetworkByName(d.Network); err != nil {
netd, err := conn.LookupNetworkByName(d.Network)
if err != nil {
return errors.Wrapf(err, "network %s doesn't exist", d.Network)
}
defer func() { _ = netd.Free() }()

// network: private
// Only create the private network if it does not already exist
if _, err := conn.LookupNetworkByName(d.PrivateNetwork); err != nil {
netp, err := conn.LookupNetworkByName(d.PrivateNetwork)
if err != nil {
// create the XML for the private network from our networkTmpl
tmpl := template.Must(template.New("network").Parse(networkTmpl))
var networkXML bytes.Buffer
Expand All @@ -141,10 +159,26 @@ func (d *Driver) createNetwork() error {
}

// and finally create it
if err := network.Create(); err != nil {
log.Debugf("Trying to create network %s...", d.PrivateNetwork)
create := func() error {
if err := network.Create(); err != nil {
return err
}
active, err := network.IsActive()
if err == nil && active {
return nil
}
return errors.Errorf("retrying %v", err)
}
if err := retry.Local(create, 10*time.Second); err != nil {
return errors.Wrapf(err, "creating network %s", d.PrivateNetwork)
}
}
defer func() {
if netp != nil {
_ = netp.Free()
}
}()

return nil
}
Expand All @@ -163,13 +197,13 @@ func (d *Driver) deleteNetwork() error {
log.Debugf("Checking if network %s exists...", d.PrivateNetwork)
network, err := conn.LookupNetworkByName(d.PrivateNetwork)
if err != nil {
if libvirtErr, ok := err.(libvirt.Error); ok && libvirtErr.Code == libvirt.ERR_NO_NETWORK {
if lvErr(err).Code == libvirt.ERR_NO_NETWORK {
log.Warnf("Network %s does not exist. Skipping deletion", d.PrivateNetwork)
return nil
}

return errors.Wrapf(err, "failed looking for network %s", d.PrivateNetwork)
}
defer func() { _ = network.Free() }()
log.Debugf("Network %s exists", d.PrivateNetwork)

err = d.checkDomains(conn)
Expand All @@ -178,15 +212,58 @@ func (d *Driver) deleteNetwork() error {
}

// when we reach this point, it means it is safe to delete the network

// cannot destroy an inactive network - try to activate it first
log.Debugf("Trying to reactivate network %s first (if needed)...", d.PrivateNetwork)
activate := func() error {
active, err := network.IsActive()
if err == nil && active {
return nil
}
if err != nil {
return err
}
// inactive, try to activate
if err := network.Create(); err != nil {
return err
}
return errors.Errorf("needs confirmation") // confirm in the next cycle
}
if err := retry.Local(activate, 10*time.Second); err != nil {
log.Debugf("Reactivating network %s failed, will continue anyway...", d.PrivateNetwork)
}

log.Debugf("Trying to destroy network %s...", d.PrivateNetwork)
err = network.Destroy()
if err != nil {
return errors.Wrap(err, "network destroy")
destroy := func() error {
if err := network.Destroy(); err != nil {
return err
}
active, err := network.IsActive()
if err == nil && !active {
return nil
}
return errors.Errorf("retrying %v", err)
}
if err := retry.Local(destroy, 10*time.Second); err != nil {
return errors.Wrap(err, "destroying network")
}

log.Debugf("Trying to undefine network %s...", d.PrivateNetwork)
err = network.Undefine()
if err != nil {
return errors.Wrap(err, "network undefine")
undefine := func() error {
if err := network.Undefine(); err != nil {
return err
}
netp, err := conn.LookupNetworkByName(d.PrivateNetwork)
if netp != nil {
_ = netp.Free()
}
if lvErr(err).Code == libvirt.ERR_NO_NETWORK {
return nil
}
return errors.Errorf("retrying %v", err)
}
if err := retry.Local(undefine, 10*time.Second); err != nil {
return errors.Wrap(err, "undefining network")
}

return nil
Expand Down Expand Up @@ -272,7 +349,6 @@ func (d *Driver) lookupIP() (string, error) {
if err != nil {
return "", errors.Wrap(err, "getting connection and domain")
}

defer conn.Close()

libVersion, err := conn.GetLibVersion()
Expand All @@ -294,6 +370,7 @@ func (d *Driver) lookupIPFromStatusFile(conn *libvirt.Connect) (string, error) {
if err != nil {
return "", errors.Wrap(err, "looking up network by name")
}
defer func() { _ = network.Free() }()

bridge, err := network.GetBridgeName()
if err != nil {
Expand Down