From 75c0539be9f783bc9724e250b1d1d722fc811d45 Mon Sep 17 00:00:00 2001 From: Dmitriy Matrenichev Date: Mon, 20 May 2024 14:23:39 +0300 Subject: [PATCH] fix: correctly handle dns messages in our dns implementation - By default, github.com/miekg/dns uses `dns.MinMsgSize` for UDP messages, which is 512 bytes. This is too small for some DNS request/responses, and can cause truncation and errors. This change sets the buffer size to `dns.DefaultMsgSize` 4096 bytes, which is the maximum size of a dns packet payload per RFC 6891. - Another thing we do is increasing CoreDNS dns request payload limit to 4096 from the default 1232 bytes. - We also retry the request if the response is truncated or previous connection was closed. - And finally we properly handle the case where the response is larger than the client buffer size, and we return a truncated correct response. Closes #8763 Signed-off-by: Dmitriy Matrenichev --- .../k8s/templates/core-dns-template.yaml | 1 + .../controllers/network/dns_resolve_cache.go | 12 +++++------ internal/pkg/dns/dns.go | 21 +++++++++++++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/internal/app/machined/pkg/controllers/k8s/templates/core-dns-template.yaml b/internal/app/machined/pkg/controllers/k8s/templates/core-dns-template.yaml index 6daa01d25a..26899b8a92 100644 --- a/internal/app/machined/pkg/controllers/k8s/templates/core-dns-template.yaml +++ b/internal/app/machined/pkg/controllers/k8s/templates/core-dns-template.yaml @@ -71,6 +71,7 @@ data: loop reload loadbalance + bufsize 4096 } --- apiVersion: apps/v1 diff --git a/internal/app/machined/pkg/controllers/network/dns_resolve_cache.go b/internal/app/machined/pkg/controllers/network/dns_resolve_cache.go index 763b4007ab..d93ccaaf48 100644 --- a/internal/app/machined/pkg/controllers/network/dns_resolve_cache.go +++ b/internal/app/machined/pkg/controllers/network/dns_resolve_cache.go @@ -37,7 +37,7 @@ type DNSResolveCacheController struct { mx sync.Mutex handler *dns.Handler nodeHandler *dns.NodeHandler - cache *dns.Cache + rootHandler dnssrv.Handler runners map[runnerConfig]pair.Pair[func(), <-chan struct{}] reconcile chan struct{} originalCtx context.Context //nolint:containedctx @@ -130,7 +130,7 @@ func (ctrl *DNSResolveCacheController) Run(ctx context.Context, r controller.Run runnerCfg := runnerConfig{net: netwk, addr: addr} if _, ok := ctrl.runners[runnerCfg]; !ok { - runner, rErr := newDNSRunner(runnerCfg, ctrl.cache, ctrl.Logger, cfg.TypedSpec().ServiceHostDNSAddress.IsValid()) + runner, rErr := newDNSRunner(runnerCfg, ctrl.rootHandler, ctrl.Logger, cfg.TypedSpec().ServiceHostDNSAddress.IsValid()) if rErr != nil { return fmt.Errorf("error creating dns runner: %w", rErr) } @@ -200,7 +200,7 @@ func (ctrl *DNSResolveCacheController) init(ctx context.Context) { ctrl.originalCtx = ctx ctrl.handler = dns.NewHandler(ctrl.Logger) ctrl.nodeHandler = dns.NewNodeHandler(ctrl.handler, &stateMapper{state: ctrl.State}, ctrl.Logger) - ctrl.cache = dns.NewCache(ctrl.nodeHandler, ctrl.Logger) + ctrl.rootHandler = dns.NewCache(ctrl.nodeHandler, ctrl.Logger) ctrl.runners = map[runnerConfig]pair.Pair[func(), <-chan struct{}]{} ctrl.reconcile = make(chan struct{}, 1) @@ -256,7 +256,7 @@ type runnerConfig struct { addr netip.AddrPort } -func newDNSRunner(cfg runnerConfig, cache *dns.Cache, logger *zap.Logger, forwardEnabled bool) (*dns.Server, error) { +func newDNSRunner(cfg runnerConfig, rootHandler dnssrv.Handler, logger *zap.Logger, forwardEnabled bool) (*dns.Server, error) { if cfg.addr.Addr().Is6() { cfg.net += "6" } @@ -279,7 +279,7 @@ func newDNSRunner(cfg runnerConfig, cache *dns.Cache, logger *zap.Logger, forwar serverOpts = dns.ServerOptions{ PacketConn: packetConn, - Handler: cache, + Handler: rootHandler, Logger: logger, } @@ -291,7 +291,7 @@ func newDNSRunner(cfg runnerConfig, cache *dns.Cache, logger *zap.Logger, forwar serverOpts = dns.ServerOptions{ Listener: listener, - Handler: cache, + Handler: rootHandler, ReadTimeout: 3 * time.Second, WriteTimeout: 5 * time.Second, IdleTimeout: func() time.Duration { return 10 * time.Second }, diff --git a/internal/pkg/dns/dns.go b/internal/pkg/dns/dns.go index d2cc5bfd28..0d99350759 100644 --- a/internal/pkg/dns/dns.go +++ b/internal/pkg/dns/dns.go @@ -45,7 +45,7 @@ func NewCache(next plugin.Handler, l *zap.Logger) *Cache { // ServeDNS implements [dns.Handler]. func (c *Cache) ServeDNS(wr dns.ResponseWriter, msg *dns.Msg) { - _, err := c.cache.ServeDNS(context.Background(), wr, msg) + _, err := c.cache.ServeDNS(context.Background(), request.NewScrubWriter(msg, wr), msg) if err != nil { // we should probably call newProxy.Healthcheck() if there are too many errors c.logger.Warn("error serving dns request", zap.Error(err)) @@ -102,9 +102,21 @@ func (h *Handler) ServeDNS(ctx context.Context, wrt dns.ResponseWriter, msg *dns ) for _, ups := range upstreams { - resp, err = ups.Connect(ctx, req, proxy.Options{}) - if errors.Is(err, proxy.ErrCachedClosed) { // Remote side closed conn, can only happen with TCP. - continue + opts := proxy.Options{} + + for { + resp, err = ups.Connect(ctx, req, opts) + + switch { + case errors.Is(err, proxy.ErrCachedClosed): // Remote side closed conn, can only happen with TCP. + continue + case resp != nil && resp.Truncated && !opts.ForceTCP: // Retry with TCP if truncated + opts.ForceTCP = true + + continue + } + + break } if err == nil { @@ -274,6 +286,7 @@ func NewServer(opts ServerOptions) *Server { Listener: opts.Listener, PacketConn: opts.PacketConn, Handler: opts.Handler, + UDPSize: dns.DefaultMsgSize, // 4096 since default is [dns.MinMsgSize] = 512 bytes, which is too small. ReadTimeout: opts.ReadTimeout, WriteTimeout: opts.WriteTimeout, IdleTimeout: opts.IdleTimeout,