-
Notifications
You must be signed in to change notification settings - Fork 881
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve load balancing scalability in swarm mode #2138
Changes from all commits
5111c24
3c4c5a7
dae02c5
07d3c5c
45dc468
c5629df
2166946
c47239d
fd7f30d
366b911
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,7 @@ type Network interface { | |
CreateEndpoint(name string, options ...EndpointOption) (Endpoint, error) | ||
|
||
// Delete the network. | ||
Delete() error | ||
Delete(options ...NetworkDeleteOption) error | ||
|
||
// Endpoints returns the list of Endpoint(s) in this network. | ||
Endpoints() []Endpoint | ||
|
@@ -875,6 +875,28 @@ func (n *network) processOptions(options ...NetworkOption) { | |
} | ||
} | ||
|
||
type networkDeleteParams struct { | ||
rmLBEndpoint bool | ||
} | ||
|
||
// NetworkDeleteOption is a type for optional parameters to pass to the | ||
// network.Delete() function. | ||
type NetworkDeleteOption func(p *networkDeleteParams) | ||
|
||
// NetworkDeleteOptionRemoveLB informs a network.Delete() operation that should | ||
// remove the load balancer endpoint for this network. Note that the Delete() | ||
// method will automatically remove a load balancing endpoint for most networks | ||
// when the network is otherwise empty. However, this does not occur for some | ||
// networks. In particular, networks marked as ingress (which are supposed to | ||
// be more permanent than other overlay networks) won't automatically remove | ||
// the LB endpoint on Delete(). This method allows for explicit removal of | ||
// such networks provided there are no other endpoints present in the network. | ||
// If the network still has non-LB endpoints present, Delete() will not | ||
// remove the LB endpoint and will return an error. | ||
func NetworkDeleteOptionRemoveLB(p *networkDeleteParams) { | ||
p.rmLBEndpoint = true | ||
} | ||
|
||
func (n *network) resolveDriver(name string, load bool) (driverapi.Driver, *driverapi.Capability, error) { | ||
c := n.getController() | ||
|
||
|
@@ -938,11 +960,23 @@ func (n *network) driver(load bool) (driverapi.Driver, error) { | |
return d, nil | ||
} | ||
|
||
func (n *network) Delete() error { | ||
return n.delete(false) | ||
func (n *network) Delete(options ...NetworkDeleteOption) error { | ||
var params networkDeleteParams | ||
for _, opt := range options { | ||
opt(¶ms) | ||
} | ||
return n.delete(false, params.rmLBEndpoint) | ||
} | ||
|
||
func (n *network) delete(force bool) error { | ||
// This function gets called in 3 ways: | ||
// * Delete() -- (false, false) | ||
// remove if endpoint count == 0 or endpoint count == 1 and | ||
// there is a load balancer IP | ||
// * Delete(libnetwork.NetworkDeleteOptionRemoveLB) -- (false, true) | ||
// remove load balancer and network if endpoint count == 1 | ||
// * controller.networkCleanup() -- (true, true) | ||
// remove the network no matter what | ||
func (n *network) delete(force bool, rmLBEndpoint bool) error { | ||
n.Lock() | ||
c := n.ctrlr | ||
name := n.name | ||
|
@@ -957,10 +991,32 @@ func (n *network) delete(force bool) error { | |
return &UnknownNetworkError{name: name, id: id} | ||
} | ||
|
||
if len(n.loadBalancerIP) != 0 { | ||
endpoints := n.Endpoints() | ||
if force || (len(endpoints) == 1 && !n.ingress) { | ||
n.deleteLoadBalancerSandbox() | ||
// Only remove ingress on force removal or explicit LB endpoint removal | ||
if n.ingress && !force && !rmLBEndpoint { | ||
return &ActiveEndpointsError{name: n.name, id: n.id} | ||
} | ||
|
||
// Check that the network is empty | ||
var emptyCount uint64 | ||
if n.hasLoadBalancerEndpoint() { | ||
emptyCount = 1 | ||
} | ||
if !force && n.getEpCnt().EndpointCnt() > emptyCount { | ||
if n.configOnly { | ||
return types.ForbiddenErrorf("configuration network %q is in use", n.Name()) | ||
} | ||
return &ActiveEndpointsError{name: n.name, id: n.id} | ||
} | ||
|
||
if n.hasLoadBalancerEndpoint() { | ||
// If we got to this point, then the following must hold: | ||
// * force is true OR endpoint count == 1 | ||
if err := n.deleteLoadBalancerSandbox(); err != nil { | ||
if !force { | ||
return err | ||
} | ||
// continue deletion when force is true even on error | ||
logrus.Warnf("Error deleting load balancer sandbox: %v", err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we used warnf but in the message it says Error. Is it just warning or error message ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe I was just keeping it consistent with the previous behavior which put it at the warning level. Presumably this behavior was because failure to remove may be due to the fact that it already got inadvertantly removed. In this case, the error is not fatal to the system since the network is being removed anyway. (see the comment above the warning). If this is the rationale, then warning level seems appropriate. The error message was, I think, the same, but we could change the wording in principle. |
||
} | ||
//Reload the network from the store to update the epcnt. | ||
n, err = c.getNetworkFromStore(id) | ||
|
@@ -969,12 +1025,10 @@ func (n *network) delete(force bool) error { | |
} | ||
} | ||
|
||
if !force && n.getEpCnt().EndpointCnt() != 0 { | ||
if n.configOnly { | ||
return types.ForbiddenErrorf("configuration network %q is in use", n.Name()) | ||
} | ||
return &ActiveEndpointsError{name: n.name, id: n.id} | ||
} | ||
// Up to this point, errors that we returned were recoverable. | ||
// From here on, any errors leave us in an inconsistent state. | ||
// This is unfortunate, but there isn't a safe way to | ||
// reconstitute a load-balancer endpoint after removing it. | ||
|
||
// Mark the network for deletion | ||
n.inDelete = true | ||
|
@@ -1023,9 +1077,6 @@ func (n *network) delete(force bool) error { | |
// Cleanup the service discovery for this network | ||
c.cleanupServiceDiscovery(n.ID()) | ||
|
||
// Cleanup the load balancer | ||
c.cleanupServiceBindings(n.ID()) | ||
|
||
removeFromStore: | ||
// deleteFromStore performs an atomic delete operation and the | ||
// network.epCnt will help prevent any possible | ||
|
@@ -1877,6 +1928,10 @@ func (n *network) hasSpecialDriver() bool { | |
return n.Type() == "host" || n.Type() == "null" | ||
} | ||
|
||
func (n *network) hasLoadBalancerEndpoint() bool { | ||
return len(n.loadBalancerIP) != 0 | ||
} | ||
|
||
func (n *network) ResolveName(req string, ipType int) ([]net.IP, bool) { | ||
var ipv6Miss bool | ||
|
||
|
@@ -2056,8 +2111,20 @@ func (c *controller) getConfigNetwork(name string) (*network, error) { | |
return n.(*network), nil | ||
} | ||
|
||
func (n *network) createLoadBalancerSandbox() error { | ||
sandboxName := n.name + "-sbox" | ||
func (n *network) lbSandboxName() string { | ||
name := "lb-" + n.name | ||
if n.ingress { | ||
name = n.name + "-sbox" | ||
} | ||
return name | ||
} | ||
|
||
func (n *network) lbEndpointName() string { | ||
return n.name + "-endpoint" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the default gw we put a restriction on the max string length, do we need something similar for the lb endpoints or sandbox names? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For some reason we restrict the length of the names for gateway sandboxes to 12 characters plus the prefix "gateway_". I'm not sure why we do this for sandboxes only but there doesn't seem to be such a restriction for endpoints, so this should be safe. |
||
} | ||
|
||
func (n *network) createLoadBalancerSandbox() (retErr error) { | ||
sandboxName := n.lbSandboxName() | ||
sbOptions := []SandboxOption{} | ||
if n.ingress { | ||
sbOptions = append(sbOptions, OptionIngress()) | ||
|
@@ -2067,44 +2134,49 @@ func (n *network) createLoadBalancerSandbox() error { | |
return err | ||
} | ||
defer func() { | ||
if err != nil { | ||
if retErr != nil { | ||
if e := n.ctrlr.SandboxDestroy(sandboxName); e != nil { | ||
logrus.Warnf("could not delete sandbox %s on failure on failure (%v): %v", sandboxName, err, e) | ||
logrus.Warnf("could not delete sandbox %s on failure on failure (%v): %v", sandboxName, retErr, e) | ||
} | ||
} | ||
}() | ||
|
||
endpointName := n.name + "-endpoint" | ||
endpointName := n.lbEndpointName() | ||
epOptions := []EndpointOption{ | ||
CreateOptionIpam(n.loadBalancerIP, nil, nil, nil), | ||
CreateOptionLoadBalancer(), | ||
} | ||
if n.hasLoadBalancerEndpoint() && !n.ingress { | ||
// Mark LB endpoints as anonymous so they don't show up in DNS | ||
epOptions = append(epOptions, CreateOptionAnonymous()) | ||
} | ||
ep, err := n.createEndpoint(endpointName, epOptions...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if err != nil { | ||
return err | ||
} | ||
defer func() { | ||
if err != nil { | ||
if retErr != nil { | ||
if e := ep.Delete(true); e != nil { | ||
logrus.Warnf("could not delete endpoint %s on failure on failure (%v): %v", endpointName, err, e) | ||
logrus.Warnf("could not delete endpoint %s on failure on failure (%v): %v", endpointName, retErr, e) | ||
} | ||
} | ||
}() | ||
|
||
if err := ep.Join(sb, nil); err != nil { | ||
return err | ||
} | ||
|
||
return sb.EnableService() | ||
} | ||
|
||
func (n *network) deleteLoadBalancerSandbox() { | ||
func (n *network) deleteLoadBalancerSandbox() error { | ||
n.Lock() | ||
c := n.ctrlr | ||
name := n.name | ||
n.Unlock() | ||
|
||
endpointName := name + "-endpoint" | ||
sandboxName := name + "-sbox" | ||
sandboxName := n.lbSandboxName() | ||
endpointName := n.lbEndpointName() | ||
|
||
endpoint, err := n.EndpointByName(endpointName) | ||
if err != nil { | ||
|
@@ -2129,6 +2201,7 @@ func (n *network) deleteLoadBalancerSandbox() { | |
} | ||
|
||
if err := c.SandboxDestroy(sandboxName); err != nil { | ||
logrus.Warnf("Failed to delete %s sandbox: %v", sandboxName, err) | ||
return fmt.Errorf("Failed to delete %s sandbox: %v", sandboxName, err) | ||
} | ||
return nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure why do you have emptyCount variable. Anyways you are using only one place. instead , you can directly use n.hasLoadBalancerEndpoint().
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code is attempting to determine whether there are any *container& endpoints left in the network. If there are, then it can not delete the network. The code does this by querying
n.getEpCnt().EndpointCnt()
to get the current number of endpoints connected to the network. If the network has a load balancer endpoint, then a count of 1 indicates that there are no container endpoints in the network. If there is no load balancer endpoint, then only a count of 0 indicates that there are no more container endpoints.