Skip to content

Commit

Permalink
draft: dns forwarding for multi-cluster
Browse files Browse the repository at this point in the history
  • Loading branch information
tinyzimmer committed Oct 15, 2023
1 parent bac8755 commit f2c125f
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 22 deletions.
3 changes: 3 additions & 0 deletions api/v1/remotenetwork_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ type NetworkConfig struct {
// MTU is the MTU to use for the interface. It will be automatically
// chosen if not provided.
MTU int `json:"mtu,omitempty"`
// ForwardDNS enables forwarding DNS requests from the remote network.
// Requires that a MeshDNS server is running on each peered CNI node.
ForwardDNS bool `json:"forwardDNS,omitempty"`
// DisableIPv4 disables IPv4 forwarding on the interface.
DisableIPv4 bool `json:"disableIPv4,omitempty"`
// DisableIPv6 disables IPv6 forwarding on the interface.
Expand Down
5 changes: 5 additions & 0 deletions deploy/crds/cni.webmesh.io_remotenetworks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,11 @@ spec:
disableIPv6:
description: DisableIPv6 disables IPv6 forwarding on the interface.
type: boolean
forwardDNS:
description: ForwardDNS enables forwarding DNS requests from the
remote network. Requires that a MeshDNS server is running on
each peered CNI node.
type: boolean
interfaceName:
description: InterfaceName is the name to give the interface.
It will be automatically chosen from the name of the remote
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ require (
github.com/vishvananda/netlink v1.2.1-beta.2
github.com/webmeshproj/api v0.9.0
github.com/webmeshproj/storage-provider-k8s v0.2.11
github.com/webmeshproj/webmesh v0.14.1
github.com/webmeshproj/webmesh v0.14.2-0.20231015212124-8bcc97648e95
gopkg.in/yaml.v3 v3.0.1
k8s.io/api v0.28.2
k8s.io/apiextensions-apiserver v0.28.0
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,12 @@ github.com/webmeshproj/storage-provider-k8s v0.2.11 h1:Ed9iulfiXOZA0qetlsEo1vpq8
github.com/webmeshproj/storage-provider-k8s v0.2.11/go.mod h1:KHY6PPkopiZpqYRc4UynUPy+ZOt0W6TbcsFyrAN8q5o=
github.com/webmeshproj/webmesh v0.14.1 h1:lQeGbFe+z9vBGWgvsgusdAwyNW2FdOMS+QjNgg9nuXQ=
github.com/webmeshproj/webmesh v0.14.1/go.mod h1:NyhtaJLkSNNfQ361wER1aLD+oGh6Fb5nj8dsXBi2P+0=
github.com/webmeshproj/webmesh v0.14.2-0.20231015204948-339a2b807d23 h1:Kfe/+acWGgaDlaBs/U99D7ArvwahSdi73lL0hyFzvUQ=
github.com/webmeshproj/webmesh v0.14.2-0.20231015204948-339a2b807d23/go.mod h1:NyhtaJLkSNNfQ361wER1aLD+oGh6Fb5nj8dsXBi2P+0=
github.com/webmeshproj/webmesh v0.14.2-0.20231015205710-00dc9371417d h1:ofNMRLP+DHQqPnn2bCOQWrdRNHg8kAoyJU5P6v+AnyY=
github.com/webmeshproj/webmesh v0.14.2-0.20231015205710-00dc9371417d/go.mod h1:NyhtaJLkSNNfQ361wER1aLD+oGh6Fb5nj8dsXBi2P+0=
github.com/webmeshproj/webmesh v0.14.2-0.20231015212124-8bcc97648e95 h1:tbibwtVzIcFfo1kMxgHfSL9mrjBeO/lrM4BH8oTx7GM=
github.com/webmeshproj/webmesh v0.14.2-0.20231015212124-8bcc97648e95/go.mod h1:NyhtaJLkSNNfQ361wER1aLD+oGh6Fb5nj8dsXBi2P+0=
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k=
github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
Expand Down
23 changes: 23 additions & 0 deletions internal/cmd/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
meshcontext "github.com/webmeshproj/webmesh/pkg/context"
"github.com/webmeshproj/webmesh/pkg/plugins/builtins"
meshservices "github.com/webmeshproj/webmesh/pkg/services"
"github.com/webmeshproj/webmesh/pkg/services/meshdns"
"github.com/webmeshproj/webmesh/pkg/version"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -341,6 +342,11 @@ func Main(build version.BuildInfo) {
}

hostCtx := meshcontext.WithLogger(context.Background(), host.NodeLogger())
if cniopts.Host.Services.MeshDNS.Enabled {
// We force subscribe forwarders to true or otherwise it would
// serve very little purpose.
cniopts.Host.Services.MeshDNS.SubscribeForwarders = true
}
srvOpts, err := cniopts.Host.Services.NewServiceOptions(hostCtx, host.Node())
if err != nil {
err := host.Stop(ctx)
Expand All @@ -350,6 +356,23 @@ func Main(build version.BuildInfo) {
log.Error(err, "Failed to create webmesh service options")
os.Exit(1)
}
if cniopts.Host.Services.MeshDNS.Enabled {
// Set the DNS server to the remote network controller
dnssrv, ok := srvOpts.GetServer(&meshdns.Server{})
if !ok {
// Something bizarre happened.
err := host.Stop(ctx)
if err != nil {
log.Error(err, "Failed to stop host node")
}
log.Error(err, "Failed to get meshdns server")
os.Exit(1)
}
remoteNetworkReconciler.SetDNSServer(dnssrv.(*meshdns.Server))
// TODO: We should technically tell the local containers
// to use this DNS server to. That way they will receive
// forward DNS requests from any remote networks.
}
srv, err := meshservices.NewServer(hostCtx, srvOpts)
if err != nil {
err := host.Stop(ctx)
Expand Down
66 changes: 45 additions & 21 deletions internal/controllers/remotenetwork_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,16 @@ import (
"time"

v1 "github.com/webmeshproj/api/v1"
"github.com/webmeshproj/storage-provider-k8s/provider"
"github.com/webmeshproj/webmesh/pkg/logging"
"github.com/webmeshproj/webmesh/pkg/meshnet"
"github.com/webmeshproj/webmesh/pkg/meshnet/endpoints"
storageprovider "github.com/webmeshproj/storage-provider-k8s/provider"
meshlogging "github.com/webmeshproj/webmesh/pkg/logging"
meshnet "github.com/webmeshproj/webmesh/pkg/meshnet"
mesheps "github.com/webmeshproj/webmesh/pkg/meshnet/endpoints"
netutil "github.com/webmeshproj/webmesh/pkg/meshnet/netutil"
meshsys "github.com/webmeshproj/webmesh/pkg/meshnet/system"
meshtransport "github.com/webmeshproj/webmesh/pkg/meshnet/transport"
meshnode "github.com/webmeshproj/webmesh/pkg/meshnode"
meshplugins "github.com/webmeshproj/webmesh/pkg/plugins"
meshdns "github.com/webmeshproj/webmesh/pkg/services/meshdns"
meshtypes "github.com/webmeshproj/webmesh/pkg/storage/types"
corev1 "k8s.io/api/core/v1"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -55,9 +56,10 @@ import (
type RemoteNetworkReconciler struct {
client.Client
config.Config
Provider *provider.Provider
Provider *storageprovider.Provider
HostNode host.Node
bridges map[client.ObjectKey]meshnode.Node
dnssrv *meshdns.Server
mu sync.Mutex
}

Expand All @@ -68,6 +70,13 @@ func (r *RemoteNetworkReconciler) SetupWithManager(mgr ctrl.Manager) (err error)
Complete(r)
}

// SetDNSServer sets the DNS server for the controller.
func (r *RemoteNetworkReconciler) SetDNSServer(srv *meshdns.Server) {
r.mu.Lock()
defer r.mu.Unlock()
r.dnssrv = srv
}

func (r *RemoteNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
r.mu.Lock()
defer r.mu.Unlock()
Expand Down Expand Up @@ -107,7 +116,6 @@ func (r *RemoteNetworkReconciler) Reconcile(ctx context.Context, req ctrl.Reques
log.Error(err, "Failed to reconcile remote network bridge")
return ctrl.Result{}, err
}

if nw.Spec.AuthMethod != cniv1.RemoteAuthMethodKubernetes {
// Request a requeue in a minute to ensure the bridge is still running
// and all node edges are up to date.
Expand Down Expand Up @@ -167,7 +175,7 @@ func (r *RemoteNetworkReconciler) reconcileNetwork(ctx context.Context, key clie
// So our node ID needs to match the key.
nodeID = privkey.ID()
}
node := NewNode(logging.NewLogger(r.Host.LogLevel, "json"), meshnode.Config{
node := NewNode(meshlogging.NewLogger(r.Host.LogLevel, "json"), meshnode.Config{
Key: privkey,
NodeID: nodeID,
ZoneAwarenessID: r.Host.NodeID,
Expand Down Expand Up @@ -221,7 +229,7 @@ func (r *RemoteNetworkReconciler) reconcileNetwork(ctx context.Context, key clie
select {
case <-bridge.Ready():
hwaddr, _ := bridge.Network().WireGuard().HardwareAddr()
log.Info("Webmesh node for bridge is running",
log.Info("Webmesh node for remote network bridge is running",
"interfaceName", bridge.Network().WireGuard().Name(),
"macAddress", hwaddr.String(),
"ipv4Address", validOrNone(bridge.Network().WireGuard().AddressV4()),
Expand All @@ -241,6 +249,7 @@ func (r *RemoteNetworkReconciler) reconcileNetwork(ctx context.Context, key clie
return ctx.Err()
}

// Make sure we route traffic to the remote network
log.Info("Ensuring local routes to remote network")
err := r.Provider.MeshDB().Networking().PutRoute(ctx, meshtypes.Route{
Route: &v1.Route{
Expand Down Expand Up @@ -272,7 +281,7 @@ func (r *RemoteNetworkReconciler) connectWithWebmeshAPI(ctx context.Context, nw
func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw *cniv1.RemoteNetwork, kubeconfig []byte, bridge meshnode.Node) error {
log := log.FromContext(ctx)
// Detect the current endpoints on the machine.
eps, err := endpoints.Detect(ctx, endpoints.DetectOpts{
eps, err := mesheps.Detect(ctx, mesheps.DetectOpts{
DetectPrivate: true, // TODO: Not necessarily required in this case.
DetectIPv6: !nw.Spec.Network.DisableIPv6,
AllowRemoteDetection: r.Manager.RemoteEndpointDetection,
Expand All @@ -289,6 +298,25 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
if err != nil {
return fmt.Errorf("failed to detect endpoints: %w", err)
}
encodedPubkey, err := bridge.Key().PublicKey().Encode()
if err != nil {
return fmt.Errorf("failed to encode public key: %w", err)
}
var bridgeFeatures []*v1.FeaturePort
if nw.Spec.Network.ForwardDNS {
if r.dnssrv == nil {
// Requeue until the DNS server is set.
return fmt.Errorf("no dns server running yet")
}
bridgeFeatures = append(bridgeFeatures, &v1.FeaturePort{
Feature: v1.Feature_MESH_DNS,
Port: int32(r.dnssrv.ListenPort()),
})
bridgeFeatures = append(bridgeFeatures, &v1.FeaturePort{
Feature: v1.Feature_FORWARD_MESH_DNS,
Port: int32(r.dnssrv.ListenPort()),
})
}
// Create a connection to the remote cluster storage
cfg, err := types.NewRestConfigFromBytes(kubeconfig)
if err != nil {
Expand All @@ -300,7 +328,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
}
return r.Host.Namespace
}()
db, err := provider.NewObserverWithConfig(cfg, provider.Options{
db, err := storageprovider.NewObserverWithConfig(cfg, storageprovider.Options{
NodeID: bridge.ID().String(),
Namespace: namespace,
})
Expand All @@ -320,7 +348,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
// Retrieve the state of the remote network.
remoteState, err := db.MeshDB().MeshState().GetMeshState(ctx)
if err != nil {
return fmt.Errorf("failed to get remote mesh state: %w", err)
return handleErr(fmt.Errorf("failed to get remote mesh state: %w", err))
}
// Create a peer for ourselves on the remote network.
var ipv4addr string
Expand Down Expand Up @@ -350,14 +378,10 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
}
ipv4addr = alloc.String()
}
encoded, err := bridge.Key().PublicKey().Encode()
if err != nil {
return handleErr(fmt.Errorf("failed to encode public key: %w", err))
}
peer := meshtypes.MeshNode{
MeshNode: &v1.MeshNode{
Id: bridge.ID().String(),
PublicKey: encoded,
PublicKey: encodedPubkey,
ZoneAwarenessID: r.Host.NodeID,
PrivateIPv4: ipv4addr,
PrivateIPv6: func() string {
Expand All @@ -366,6 +390,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
}
return netutil.AssignToPrefix(remoteState.NetworkV6(), bridge.Key().PublicKey()).String()
}(),
Features: bridgeFeatures,
},
}
log.Info("Registering ourselves with the remote meshdb", "peer", peer.MeshNode)
Expand Down Expand Up @@ -405,7 +430,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
err = db.Consensus().AddObserver(ctx, meshtypes.StoragePeer{
StoragePeer: &v1.StoragePeer{
Id: bridge.ID().String(),
PublicKey: encoded,
PublicKey: encodedPubkey,
},
})
if err != nil {
Expand Down Expand Up @@ -449,7 +474,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
}, false)
})
log.Info("Connecting to remote network")
err = bridge.Connect(ctx, meshnode.ConnectOptions{
err = bridge.Connect(r.HostNode.NodeContext(ctx), meshnode.ConnectOptions{
StorageProvider: db,
MaxJoinRetries: 10,
JoinRoundTripper: joinRTT,
Expand Down Expand Up @@ -495,7 +520,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
peer = meshtypes.MeshNode{
MeshNode: &v1.MeshNode{
Id: bridge.ID().String(),
PublicKey: encoded,
PublicKey: encodedPubkey,
PrimaryEndpoint: func() string {
if eps.FirstPublicAddr().IsValid() {
return eps.FirstPublicAddr().String()
Expand All @@ -511,8 +536,7 @@ func (r *RemoteNetworkReconciler) connectWithKubeconfig(ctx context.Context, nw
}
return netutil.AssignToPrefix(remoteState.NetworkV6(), bridge.Key().PublicKey()).String()
}(),
// TODO: Optionally advertise forward DNS.
Features: nil,
Features: bridgeFeatures,
},
}
log.Info("Updating peer with wireguard endpoints", "peer", peer.MeshNode)
Expand Down
12 changes: 12 additions & 0 deletions internal/host/hostnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"sync/atomic"

v1 "github.com/webmeshproj/api/v1"
meshcontext "github.com/webmeshproj/webmesh/pkg/context"
"github.com/webmeshproj/webmesh/pkg/logging"
"github.com/webmeshproj/webmesh/pkg/meshnet"
endpoints "github.com/webmeshproj/webmesh/pkg/meshnet/endpoints"
Expand Down Expand Up @@ -66,6 +67,8 @@ type Node interface {
Node() meshnode.Node
// NodeLogger returns the node's logger.
NodeLogger() *slog.Logger
// NodeContext returns a context with the node's logger.
NodeContext(context.Context) context.Context
}

// NewNode is the function for creating a new mesh node. Declared as a variable for testing purposes.
Expand Down Expand Up @@ -121,6 +124,15 @@ func (h *hostNode) NodeLogger() *slog.Logger {
return h.nodeLog
}

// NodeContext returns a context with the node's logger. If context is
// nil, the background context is used.
func (h *hostNode) NodeContext(ctx context.Context) context.Context {
if ctx != nil {
return meshcontext.WithLogger(ctx, h.nodeLog)
}
return meshcontext.WithLogger(context.Background(), h.nodeLog)
}

// Start starts the host node.
func (h *hostNode) Start(ctx context.Context, cfg *rest.Config) error {
h.mu.Lock()
Expand Down

0 comments on commit f2c125f

Please sign in to comment.