From be39fb20cc51caf0d739fb5514254c979231fc3e Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Wed, 7 Mar 2018 10:01:12 +0100
Subject: [PATCH 1/7] [BUGFIX] do not break when TCP DNS answer exceeds 64k
It will avoid having discovery broken when having large number
of instances of a service (works with SRV and A* records).
Fixes https://github.com/hashicorp/consul/issues/3850
---
agent/dns.go | 60 ++++++++++++++++++++++----------
agent/dns_test.go | 88 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 130 insertions(+), 18 deletions(-)
diff --git a/agent/dns.go b/agent/dns.go
index b809a2b3a318..5011fbbbf118 100644
--- a/agent/dns.go
+++ b/agent/dns.go
@@ -713,6 +713,32 @@ func syncExtra(index map[string]dns.RR, resp *dns.Msg) {
resp.Extra = extra
}
+// trimTCPResponse limit the MaximumSize of messages to 64k as it is the limit
+// of DNS responses
+func trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
+ hasExtra := len(resp.Extra) > 0
+ maxSize := 65535
+
+ // We avoid some function calls and allocations by only handling the
+ // extra data when necessary.
+ var index map[string]dns.RR
+ if hasExtra {
+ index = make(map[string]dns.RR, len(resp.Extra))
+ indexRRs(resp.Extra, index)
+ }
+ truncated := false
+
+ // This enforces the given limit on 64k, the max limit for DNS messages
+ for len(resp.Answer) > 0 && resp.Len() > maxSize {
+ truncated = true
+ resp.Answer = resp.Answer[:len(resp.Answer)-1]
+ if hasExtra {
+ syncExtra(index, resp)
+ }
+ }
+ return truncated
+}
+
// trimUDPResponse makes sure a UDP response is not longer than allowed by RFC
// 1035. Enforce an arbitrary limit that can be further ratcheted down by
// config, and then make sure the response doesn't exceed 512 bytes. Any extra
@@ -765,6 +791,20 @@ func trimUDPResponse(req, resp *dns.Msg, udpAnswerLimit int) (trimmed bool) {
return len(resp.Answer) < numAnswers
}
+// trimDNSResponse will trim the response for UDP and TCP
+func (d *DNSServer) trimDNSResponse(network string, req, resp *dns.Msg) (trimmed bool) {
+ if network != "tcp" {
+ trimmed = trimUDPResponse(req, resp, d.config.UDPAnswerLimit)
+ } else {
+ trimmed = trimTCPResponse(req, resp)
+ }
+ // Flag that there are more records to return in the UDP response
+ if trimmed && d.config.EnableTruncate {
+ resp.Truncated = true
+ }
+ return trimmed
+}
+
// lookupServiceNodes returns nodes with a given service.
func (d *DNSServer) lookupServiceNodes(datacenter, service, tag string) (structs.IndexedCheckServiceNodes, error) {
args := structs.ServiceSpecificRequest{
@@ -840,15 +880,7 @@ func (d *DNSServer) serviceLookup(network, datacenter, service, tag string, req,
d.serviceNodeRecords(datacenter, out.Nodes, req, resp, ttl)
}
- // If the network is not TCP, restrict the number of responses
- if network != "tcp" {
- wasTrimmed := trimUDPResponse(req, resp, d.config.UDPAnswerLimit)
-
- // Flag that there are more records to return in the UDP response
- if wasTrimmed && d.config.EnableTruncate {
- resp.Truncated = true
- }
- }
+ d.trimDNSResponse(network, req, resp)
// If the answer is empty and the response isn't truncated, return not found
if len(resp.Answer) == 0 && !resp.Truncated {
@@ -950,15 +982,7 @@ RPC:
d.serviceNodeRecords(out.Datacenter, out.Nodes, req, resp, ttl)
}
- // If the network is not TCP, restrict the number of responses.
- if network != "tcp" {
- wasTrimmed := trimUDPResponse(req, resp, d.config.UDPAnswerLimit)
-
- // Flag that there are more records to return in the UDP response
- if wasTrimmed && d.config.EnableTruncate {
- resp.Truncated = true
- }
- }
+ d.trimDNSResponse(network, req, resp)
// If the answer is empty and the response isn't truncated, return not found
if len(resp.Answer) == 0 && !resp.Truncated {
diff --git a/agent/dns_test.go b/agent/dns_test.go
index d42abbeb7a06..18da89439fcd 100644
--- a/agent/dns_test.go
+++ b/agent/dns_test.go
@@ -2740,6 +2740,94 @@ func TestDNS_ServiceLookup_Randomize(t *testing.T) {
}
}
+func TestDNS_TCP_and_UDP_Truncate(t *testing.T) {
+ t.Parallel()
+ a := NewTestAgent(t.Name(), `
+ dns_config {
+ enable_truncate = true
+ }
+ `)
+ defer a.Shutdown()
+
+ services := []string{"normal", "truncated"}
+ for index, service := range services {
+ numServices := (index * 5000) + 2
+ for i := 1; i < numServices; i++ {
+ args := &structs.RegisterRequest{
+ Datacenter: "dc1",
+ Node: fmt.Sprintf("%s-%d.acme.com", service, i),
+ Address: fmt.Sprintf("127.%d.%d.%d", index, (i / 255), i%255),
+ Service: &structs.NodeService{
+ Service: service,
+ Port: 8000,
+ },
+ }
+
+ var out struct{}
+ if err := a.RPC("Catalog.Register", args, &out); err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ }
+
+ // Register an equivalent prepared query.
+ var id string
+ {
+ args := &structs.PreparedQueryRequest{
+ Datacenter: "dc1",
+ Op: structs.PreparedQueryCreate,
+ Query: &structs.PreparedQuery{
+ Name: service,
+ Service: structs.ServiceQuery{
+ Service: service,
+ },
+ },
+ }
+ if err := a.RPC("PreparedQuery.Apply", args, &id); err != nil {
+ t.Fatalf("err: %v", err)
+ }
+ }
+
+ // Look up the service directly and via prepared query. Ensure the
+ // response is truncated each time.
+ questions := []string{
+ fmt.Sprintf("%s.service.consul.", service),
+ id + ".query.consul.",
+ }
+ protocols := []string{
+ "tcp",
+ "udp",
+ }
+ for _, qType := range []uint16{dns.TypeANY, dns.TypeA, dns.TypeSRV} {
+ for _, question := range questions {
+ for _, protocol := range protocols {
+ t.Run(fmt.Sprintf("lookup %s %s (qType:=%d)", question, protocol, qType), func(t *testing.T) {
+ m := new(dns.Msg)
+ m.SetQuestion(question, dns.TypeANY)
+ if protocol == "udp" {
+ m.SetEdns0(8192, true)
+ }
+ c := new(dns.Client)
+ c.Net = protocol
+ in, out, err := c.Exchange(m, a.DNSAddr())
+ if err != nil && err != dns.ErrTruncated {
+ t.Fatalf("err: %v", err)
+ }
+
+ // Check for the truncate bit
+ shouldBeTruncated := numServices > 4095
+
+ if shouldBeTruncated != in.Truncated {
+ info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) in %v",
+ service, question, protocol, numServices, out)
+ t.Fatalf("Should have truncate:=%v for %s", shouldBeTruncated, info)
+ }
+ })
+ }
+ }
+ }
+ }
+}
+
func TestDNS_ServiceLookup_Truncate(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), `
From b77fd5ce9df2d1d46e27d91ecf9d385d21d6fa4f Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Wed, 7 Mar 2018 16:14:36 +0100
Subject: [PATCH 2/7] 64000 max limit to DNS messages since there is overhead
Added debug log to give information about truncation.
---
agent/dns.go | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/agent/dns.go b/agent/dns.go
index 5011fbbbf118..21e9e67140d0 100644
--- a/agent/dns.go
+++ b/agent/dns.go
@@ -715,9 +715,10 @@ func syncExtra(index map[string]dns.RR, resp *dns.Msg) {
// trimTCPResponse limit the MaximumSize of messages to 64k as it is the limit
// of DNS responses
-func trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
+func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
hasExtra := len(resp.Extra) > 0
- maxSize := 65535
+ // There is some overhead, 65535 does not work
+ maxSize := 64000
// We avoid some function calls and allocations by only handling the
// extra data when necessary.
@@ -726,6 +727,8 @@ func trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
index = make(map[string]dns.RR, len(resp.Extra))
indexRRs(resp.Extra, index)
}
+ originalSize := resp.Len()
+ originalNumRecords := len(resp.Answer)
truncated := false
// This enforces the given limit on 64k, the max limit for DNS messages
@@ -736,6 +739,12 @@ func trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
syncExtra(index, resp)
}
}
+ if truncated {
+ d.logger.Printf("[DEBUG] dns: TCP answer to %v too large truncated recs:=%d/%d, size:=%d/%d",
+ req.Question,
+ len(resp.Answer), originalNumRecords, resp.Len(), originalSize)
+
+ }
return truncated
}
@@ -796,7 +805,7 @@ func (d *DNSServer) trimDNSResponse(network string, req, resp *dns.Msg) (trimmed
if network != "tcp" {
trimmed = trimUDPResponse(req, resp, d.config.UDPAnswerLimit)
} else {
- trimmed = trimTCPResponse(req, resp)
+ trimmed = d.trimTCPResponse(req, resp)
}
// Flag that there are more records to return in the UDP response
if trimmed && d.config.EnableTruncate {
From 7d59249d96e5e2d24c423024174d27618f7ad478 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Wed, 7 Mar 2018 23:33:41 +0100
Subject: [PATCH 3/7] Avoid issue with compression of DNS messages causing
overflow
---
agent/dns.go | 7 ++++++-
agent/dns_test.go | 47 +++++++++++++++++++++++++----------------------
2 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/agent/dns.go b/agent/dns.go
index 21e9e67140d0..2750ed6b02ad 100644
--- a/agent/dns.go
+++ b/agent/dns.go
@@ -718,7 +718,10 @@ func syncExtra(index map[string]dns.RR, resp *dns.Msg) {
func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
hasExtra := len(resp.Extra) > 0
// There is some overhead, 65535 does not work
- maxSize := 64000
+ maxSize := 65533 // 64k - 2 bytes
+ // In order to compute properly, we have to avoid compress first
+ compressed := resp.Compress
+ resp.Compress = false
// We avoid some function calls and allocations by only handling the
// extra data when necessary.
@@ -745,6 +748,8 @@ func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
len(resp.Answer), originalNumRecords, resp.Len(), originalSize)
}
+ // Restore compression if any
+ resp.Compress = compressed
return truncated
}
diff --git a/agent/dns_test.go b/agent/dns_test.go
index 18da89439fcd..cf9571de05cc 100644
--- a/agent/dns_test.go
+++ b/agent/dns_test.go
@@ -2800,28 +2800,31 @@ func TestDNS_TCP_and_UDP_Truncate(t *testing.T) {
for _, qType := range []uint16{dns.TypeANY, dns.TypeA, dns.TypeSRV} {
for _, question := range questions {
for _, protocol := range protocols {
- t.Run(fmt.Sprintf("lookup %s %s (qType:=%d)", question, protocol, qType), func(t *testing.T) {
- m := new(dns.Msg)
- m.SetQuestion(question, dns.TypeANY)
- if protocol == "udp" {
- m.SetEdns0(8192, true)
- }
- c := new(dns.Client)
- c.Net = protocol
- in, out, err := c.Exchange(m, a.DNSAddr())
- if err != nil && err != dns.ErrTruncated {
- t.Fatalf("err: %v", err)
- }
-
- // Check for the truncate bit
- shouldBeTruncated := numServices > 4095
-
- if shouldBeTruncated != in.Truncated {
- info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) in %v",
- service, question, protocol, numServices, out)
- t.Fatalf("Should have truncate:=%v for %s", shouldBeTruncated, info)
- }
- })
+ for _, compress := range []bool{true, false} {
+ t.Run(fmt.Sprintf("lookup %s %s (qType:=%d) compressed=%b", question, protocol, qType, compress), func(t *testing.T) {
+ m := new(dns.Msg)
+ m.SetQuestion(question, dns.TypeANY)
+ if protocol == "udp" {
+ m.SetEdns0(8192, true)
+ }
+ c := new(dns.Client)
+ c.Net = protocol
+ m.Compress = compress
+ in, out, err := c.Exchange(m, a.DNSAddr())
+ if err != nil && err != dns.ErrTruncated {
+ t.Fatalf("err: %v", err)
+ }
+
+ // Check for the truncate bit
+ shouldBeTruncated := numServices > 4095
+
+ if shouldBeTruncated != in.Truncated {
+ info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) in %v",
+ service, question, protocol, numServices, out)
+ t.Fatalf("Should have truncate:=%v for %s", shouldBeTruncated, info)
+ }
+ })
+ }
}
}
}
From ce3f47a75d20989a87f7fd515a9a7000d95b133a Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 8 Mar 2018 00:26:41 +0100
Subject: [PATCH 4/7] Performance optimization for services having more than 2k
records
---
agent/dns.go | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/agent/dns.go b/agent/dns.go
index 2750ed6b02ad..73d6a30a00b6 100644
--- a/agent/dns.go
+++ b/agent/dns.go
@@ -726,12 +726,19 @@ func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
// We avoid some function calls and allocations by only handling the
// extra data when necessary.
var index map[string]dns.RR
+ originalSize := resp.Len()
+ originalNumRecords := len(resp.Answer)
+
+ // Beyond 2500 records, performance gets bad
+ // Limit the number of records at once, anyway, it won't fit in 64k
+ // For SRV Records, the max is around 500 records, for A, less than 2k
+ if len(resp.Answer) > 2048 {
+ resp.Answer = resp.Answer[:2048]
+ }
if hasExtra {
index = make(map[string]dns.RR, len(resp.Extra))
indexRRs(resp.Extra, index)
}
- originalSize := resp.Len()
- originalNumRecords := len(resp.Answer)
truncated := false
// This enforces the given limit on 64k, the max limit for DNS messages
@@ -746,7 +753,6 @@ func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
d.logger.Printf("[DEBUG] dns: TCP answer to %v too large truncated recs:=%d/%d, size:=%d/%d",
req.Question,
len(resp.Answer), originalNumRecords, resp.Len(), originalSize)
-
}
// Restore compression if any
resp.Compress = compressed
From d0e45f22dfc981ad3bcc61935b50c68da30b764c Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 8 Mar 2018 00:36:17 +0100
Subject: [PATCH 5/7] Fixed wrong format of debug msg in unit test
---
agent/dns_test.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/agent/dns_test.go b/agent/dns_test.go
index cf9571de05cc..06216759d2ad 100644
--- a/agent/dns_test.go
+++ b/agent/dns_test.go
@@ -2801,7 +2801,7 @@ func TestDNS_TCP_and_UDP_Truncate(t *testing.T) {
for _, question := range questions {
for _, protocol := range protocols {
for _, compress := range []bool{true, false} {
- t.Run(fmt.Sprintf("lookup %s %s (qType:=%d) compressed=%b", question, protocol, qType, compress), func(t *testing.T) {
+ t.Run(fmt.Sprintf("lookup %s %s (qType:=%d) compressed=%v", question, protocol, qType, compress), func(t *testing.T) {
m := new(dns.Msg)
m.SetQuestion(question, dns.TypeANY)
if protocol == "udp" {
From 93fa1f6f492914dac9739ded1fdf0f39bfb16d53 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Fri, 9 Mar 2018 18:25:29 +0100
Subject: [PATCH 6/7] Optimize size for SRV records, should improve performance
a bit
Stricter Unit tests that checks if truncation was OK.
---
agent/dns.go | 8 ++++++--
agent/dns_test.go | 6 +++---
2 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/agent/dns.go b/agent/dns.go
index 73d6a30a00b6..2b1845b54915 100644
--- a/agent/dns.go
+++ b/agent/dns.go
@@ -732,8 +732,12 @@ func (d *DNSServer) trimTCPResponse(req, resp *dns.Msg) (trimmed bool) {
// Beyond 2500 records, performance gets bad
// Limit the number of records at once, anyway, it won't fit in 64k
// For SRV Records, the max is around 500 records, for A, less than 2k
- if len(resp.Answer) > 2048 {
- resp.Answer = resp.Answer[:2048]
+ truncateAt := 2048
+ if req.Question[0].Qtype == dns.TypeSRV {
+ truncateAt = 640
+ }
+ if len(resp.Answer) > truncateAt {
+ resp.Answer = resp.Answer[:truncateAt]
}
if hasExtra {
index = make(map[string]dns.RR, len(resp.Extra))
diff --git a/agent/dns_test.go b/agent/dns_test.go
index 06216759d2ad..8de23969889a 100644
--- a/agent/dns_test.go
+++ b/agent/dns_test.go
@@ -2818,9 +2818,9 @@ func TestDNS_TCP_and_UDP_Truncate(t *testing.T) {
// Check for the truncate bit
shouldBeTruncated := numServices > 4095
- if shouldBeTruncated != in.Truncated {
- info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) in %v",
- service, question, protocol, numServices, out)
+ if shouldBeTruncated != in.Truncated || len(in.Answer) > 2000 || len(in.Answer) < 1 || in.Len() > 65535 {
+ info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) sz:= %d in %v",
+ service, question, protocol, numServices, len(in.Answer), out)
t.Fatalf("Should have truncate:=%v for %s", shouldBeTruncated, info)
}
})
From aebfcb6767023f0fceaa812ae36b2d7a8043acdb Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Fri, 9 Mar 2018 18:42:13 +0100
Subject: [PATCH 7/7] Fixed minor typo (+ travis tests is unstable)
---
agent/dns_test.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/agent/dns_test.go b/agent/dns_test.go
index 8de23969889a..46a5d08ec64f 100644
--- a/agent/dns_test.go
+++ b/agent/dns_test.go
@@ -2821,7 +2821,7 @@ func TestDNS_TCP_and_UDP_Truncate(t *testing.T) {
if shouldBeTruncated != in.Truncated || len(in.Answer) > 2000 || len(in.Answer) < 1 || in.Len() > 65535 {
info := fmt.Sprintf("service %s question:=%s (%s) (%d total records) sz:= %d in %v",
service, question, protocol, numServices, len(in.Answer), out)
- t.Fatalf("Should have truncate:=%v for %s", shouldBeTruncated, info)
+ t.Fatalf("Should have truncated:=%v for %s", shouldBeTruncated, info)
}
})
}