Skip to content

Commit

Permalink
fix(routes): deleting wrong routes when other server has same private IP
Browse files Browse the repository at this point in the history
In case another server in the project has the same Private IP (in
another network) as one of our cluster nodes, hccm would happily
delete&recreate the route on every route reconciliation.

This fixes the bug by only adding the Private IPs in the correct network
to the AllServersCache ByPrivateIP lookup map.

Fixes #470

Co-authored-by: Jonas Lammler <ljonas@riseup.net>
  • Loading branch information
apricote and jooola committed Jul 18, 2023
1 parent 5a066a1 commit e0692e0
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 3 deletions.
11 changes: 8 additions & 3 deletions hcloud/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@ func newRoutes(client *hcloud.Client, networkID int64) (*routes, error) {
}

return &routes{
client: client,
network: networkObj,
serverCache: &hcops.AllServersCache{LoadFunc: client.Server.All},
client: client,
network: networkObj,
serverCache: &hcops.AllServersCache{
// client.Server.All will load ALL the servers in the project, even those
// that are not part of the Kubernetes cluster.
LoadFunc: client.Server.All,
Network: networkObj,
},
}, nil
}

Expand Down
13 changes: 13 additions & 0 deletions internal/hcops/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/hetznercloud/hcloud-cloud-controller-manager/internal/metrics"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"k8s.io/klog/v2"
)

// AllServersCache caches the result of the LoadFunc and provides random access
Expand All @@ -21,6 +22,9 @@ type AllServersCache struct {
LoadTimeout time.Duration
MaxAge time.Duration

// If set, only IPs in this network will be considered for [ByPrivateIP]
Network *hcloud.Network

lastRefresh time.Time
byPrivIP map[string]*hcloud.Server
byName map[string]*hcloud.Server
Expand Down Expand Up @@ -102,9 +106,18 @@ func (c *AllServersCache) getCache(getSrv func() (*hcloud.Server, bool)) (*hclou
for _, srv := range srvs {
// Index servers by the IPs of their private networks
for _, n := range srv.PrivateNet {
if c.Network != nil {
if n.Network.ID != c.Network.ID {
// Only consider private IPs in the right network
continue
}
}
if n.IP == nil {
continue
}
if _, ok := c.byPrivIP[n.IP.String()]; ok {
klog.Warningf("Overriding AllServersCache entry for private ip %s with server %s\n", n.IP.String(), srv.Name)
}
c.byPrivIP[n.IP.String()] = srv
}

Expand Down
46 changes: 46 additions & 0 deletions internal/hcops/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,52 @@ func TestAllServersCache_ClientError(t *testing.T) {
runAllServersCacheTests(t, "Not found", tmpl, cacheOps)
}

func TestAllServersCache_DuplicatePrivateIP(t *testing.T) {
// Regression test for https://github.com/hetznercloud/hcloud-cloud-controller-manager/issues/470

network := &hcloud.Network{
ID: 12345,
Name: "cluster-network",
}
srv := &hcloud.Server{
ID: 101010,
Name: "cluster-node",
PrivateNet: []hcloud.ServerPrivateNet{
{
IP: net.ParseIP("10.0.0.4"),
Network: network,
},
},
}
srvInvalid := &hcloud.Server{
ID: 101012,
Name: "invalid-node",
PrivateNet: []hcloud.ServerPrivateNet{
{
IP: net.ParseIP("10.0.0.4"),
Network: &hcloud.Network{
ID: 54321,
Name: "invalid-network",
},
},
},
}

cacheOps := newAllServersCacheOps(t, srv)
tmpl := allServersCacheTestCase{
SetUp: func(t *testing.T, tt *allServersCacheTestCase) {
tt.Cache.Network = network

tt.ServerClient.
On("All", mock.Anything).
Return([]*hcloud.Server{srv, srvInvalid}, nil)
},
Expected: srv,
}

runAllServersCacheTests(t, "DuplicatePrivateIP", tmpl, cacheOps)
}

type allServersCacheOp func(c *hcops.AllServersCache) (*hcloud.Server, error)

func newAllServersCacheOps(t *testing.T, srv *hcloud.Server) map[string]allServersCacheOp {
Expand Down

0 comments on commit e0692e0

Please sign in to comment.