Skip to content

Commit

Permalink
Add endpoint load-balancing mode
Browse files Browse the repository at this point in the history
This is the heart of the scalability change for services in libnetwork.
The present routing mesh adds load-balancing rules for a network to
every container connected to the network.  This newer approach creates a
load-balancing endpoint per network per node.  For every service on a
network, libnetwork assigns the VIP of the service to the endpoint's
interface as an alias.  This endpoint must have a unique IP address in
order to route return traffic to it.  Traffic destined for a service's
VIP arrives at the load-balancing endpoint on the VIP and from there,
Linux load balances it among backend destinations while SNATing said
traffic to the endpoint's unique IP address.

The net result of this scheme is that each node in a swarm need only
have one set of load balancing state per service instead of one per
container on the node.  This scheme is very similar to how services
currently operate on Windows nodes in libnetwork.  It (as with Windows
nodes) costs the use of extra IP addresses in a network (one per node)
and an extra network hop in the stack, although, always in the stack
local to the container.

In order to prevent existing deployments from suddenly failing if they
failed to allocate sufficient address space to include per-node
load-balancing endpoint IP addresses, this patch preserves the existing
functionality and activates the new functionality on a per-network
basis depending on whether the network has a load-balancing endpoint.
Eventually, moby should always set this option when creating new
networks and should only omit it for networks created as part of a swarm
that are not marked to use endpoint load balancing.

This patch also normalizes the code to treat "load" and "balancer"
as two separate words from the perspectives of variable/function naming.
This means that the 'b' in "balancer" must be capitalized.

Signed-off-by: Chris Telfer <ctelfer@docker.com>
  • Loading branch information
ctelfer committed May 10, 2018
1 parent e27dcbf commit b58e5e9
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 136 deletions.
2 changes: 1 addition & 1 deletion controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ addToStore:
}
}()

if len(network.loadBalancerIP) != 0 {
if network.hasLoadBalancerEndpoint() {
if err = network.createLoadBalancerSandbox(); err != nil {
return nil, err
}
Expand Down
20 changes: 12 additions & 8 deletions network.go
Original file line number Diff line number Diff line change
Expand Up @@ -981,8 +981,8 @@ func (n *network) delete(force bool, delIngress bool) error {
}

// Check that the network is empty
var emptyCount uint64 = 0
if len(n.loadBalancerIP) != 0 {
var emptyCount uint64
if n.hasLoadBalancerEndpoint() {
emptyCount = 1
}
if !force && n.getEpCnt().EndpointCnt() > emptyCount {
Expand All @@ -992,7 +992,7 @@ func (n *network) delete(force bool, delIngress bool) error {
return &ActiveEndpointsError{name: n.name, id: n.id}
}

if len(n.loadBalancerIP) != 0 {
if n.hasLoadBalancerEndpoint() {
// If we got to this point, then the following must hold:
// * force is true, OR
// * endpoint count == 1 AND:
Expand Down Expand Up @@ -1065,9 +1065,6 @@ func (n *network) delete(force bool, delIngress bool) error {
// Cleanup the service discovery for this network
c.cleanupServiceDiscovery(n.ID())

// Cleanup the load balancer
c.cleanupServiceBindings(n.ID())

removeFromStore:
// deleteFromStore performs an atomic delete operation and the
// network.epCnt will help prevent any possible
Expand Down Expand Up @@ -1918,6 +1915,10 @@ func (n *network) hasSpecialDriver() bool {
return n.Type() == "host" || n.Type() == "null"
}

func (n *network) hasLoadBalancerEndpoint() bool {
return len(n.loadBalancerIP) != 0
}

func (n *network) ResolveName(req string, ipType int) ([]net.IP, bool) {
var ipv6Miss bool

Expand Down Expand Up @@ -2098,9 +2099,9 @@ func (c *controller) getConfigNetwork(name string) (*network, error) {
}

func (n *network) lbSandboxName() string {
name := n.name + "-sbox"
name := "lb-" + n.name
if n.ingress {
name = "lb-" + n.name
name = n.name + "-sbox"
}
return name
}
Expand Down Expand Up @@ -2133,6 +2134,9 @@ func (n *network) createLoadBalancerSandbox() error {
CreateOptionIpam(n.loadBalancerIP, nil, nil, nil),
CreateOptionLoadBalancer(),
}
if n.hasLoadBalancerEndpoint() && !n.ingress {
epOptions = append(epOptions, CreateOptionAnonymous())
}
ep, err := n.createEndpoint(endpointName, epOptions...)
if err != nil {
return err
Expand Down
18 changes: 1 addition & 17 deletions sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -726,16 +726,8 @@ func releaseOSSboxResources(osSbox osl.Sandbox, ep *endpoint) {

ep.Lock()
joinInfo := ep.joinInfo
vip := ep.virtualIP
ep.Unlock()

if len(vip) != 0 {
loopName := osSbox.GetLoopbackIfaceName()
if err := osSbox.RemoveAliasIP(loopName, &net.IPNet{IP: vip, Mask: net.CIDRMask(32, 32)}); err != nil {
logrus.Warnf("Remove virtual IP %v failed: %v", vip, err)
}
}

if joinInfo == nil {
return
}
Expand Down Expand Up @@ -848,14 +840,6 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
}
}

if len(ep.virtualIP) != 0 {
loopName := sb.osSbox.GetLoopbackIfaceName()
err := sb.osSbox.AddAliasIP(loopName, &net.IPNet{IP: ep.virtualIP, Mask: net.CIDRMask(32, 32)})
if err != nil {
return fmt.Errorf("failed to add virtual IP %v: %v", ep.virtualIP, err)
}
}

if joinInfo != nil {
// Set up non-interface routes.
for _, r := range joinInfo.StaticRoutes {
Expand All @@ -881,7 +865,7 @@ func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
// information including gateway and other routes so that
// loadbalancers are populated all the network state is in
// place in the sandbox.
sb.populateLoadbalancers(ep)
sb.populateLoadBalancers(ep)

// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
Expand Down
24 changes: 24 additions & 0 deletions service_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,18 @@ func (c *controller) addServiceBinding(svcName, svcID, nID, eID, containerName s
return err
}

// We must lock the network first in order to prevent adding
// endpoints concurrently to a network still being created or
// deleted. This must occur before locking the service because
// other operations such as adding endpoints to sandboxes can
// enumerate the services which also requires locking them. This
// can lead to deadlock where a network create has a network lock
// and seeks a service lock (to add a gateway or LB sandbox) while
// a service binding operation has a service lock and tries to
// acquire a network lock.
c.networkLocker.Lock(nID)
defer c.networkLocker.Unlock(nID)

skey := serviceKey{
id: svcID,
ports: portConfigs(ingressPorts).String(),
Expand Down Expand Up @@ -310,6 +322,18 @@ func (c *controller) rmServiceBinding(svcName, svcID, nID, eID, containerName st
return err
}

// We must lock the network first in order to prevent adding
// endpoints concurrently to a network still being created or
// deleted. This must occur before locking the service because
// other operations such as adding endpoints to sandboxes can
// enumerate the services which also requires locking them. This
// can lead to deadlock where a network create has a network lock
// and seeks a service lock (to add a gateway or LB sandbox) while
// a service binding operation has a service lock and tries to
// acquire a network lock.
c.networkLocker.Lock(nID)
defer c.networkLocker.Unlock(nID)

skey := serviceKey{
id: svcID,
ports: portConfigs(ingressPorts).String(),
Expand Down
Loading

0 comments on commit b58e5e9

Please sign in to comment.