Skip to content

Commit

Permalink
feat: [sc-110727] troubleshoot: collector/analyzer for wildcard dns (#…
Browse files Browse the repository at this point in the history
…1606)

* store DNS collector in JSON output for analyze later

* fix incorrect path

* configurable dns image

* make non resolvable domain configurable

* nit update address field

* * update dns util image
* add unit test
  • Loading branch information
nvanthao authored Sep 11, 2024
1 parent f662161 commit 7484b10
Show file tree
Hide file tree
Showing 9 changed files with 176 additions and 14 deletions.
4 changes: 4 additions & 0 deletions config/crds/troubleshoot.sh_collectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,10 @@ spec:
type: string
exclude:
type: BoolString
image:
type: string
nonResolvable:
type: string
timeout:
type: string
type: object
Expand Down
4 changes: 4 additions & 0 deletions config/crds/troubleshoot.sh_preflights.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2027,6 +2027,10 @@ spec:
type: string
exclude:
type: BoolString
image:
type: string
nonResolvable:
type: string
timeout:
type: string
type: object
Expand Down
4 changes: 4 additions & 0 deletions config/crds/troubleshoot.sh_supportbundles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2058,6 +2058,10 @@ spec:
type: string
exclude:
type: BoolString
image:
type: string
nonResolvable:
type: string
timeout:
type: string
type: object
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/troubleshoot/v1beta2/collector_shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ type Sonobuoy struct {
type DNS struct {
CollectorMeta `json:",inline" yaml:",inline"`
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
Image string `json:"image,omitempty" yaml:"image,omitempty"`
NonResolvable string `json:"nonResolvable,omitempty" yaml:"nonResolvable,omitempty"`
}

type Etcd struct {
Expand Down
117 changes: 103 additions & 14 deletions pkg/collect/dns.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
package collect

import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"path/filepath"
"strings"
"time"

Expand All @@ -21,7 +21,8 @@ import (
)

const (
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
dnsUtilsImage = "registry.k8s.io/e2e-test-images/agnhost:2.39"
nonResolvableDomain = "non-existent-domain"
)

type CollectDNS struct {
Expand All @@ -34,6 +35,25 @@ type CollectDNS struct {
RBACErrors
}

// DNSTroubleshootResult represents the structure of the DNS troubleshooting JSON data
type DNSTroubleshootResult struct {
KubernetesClusterIP string `json:"kubernetesClusterIP"`
PodResolvConf string `json:"podResolvConf"`
Query struct {
Kubernetes struct {
Name string `json:"name"`
Address string `json:"address"`
} `json:"kubernetes"`
NonResolvableDomain struct {
Name string `json:"name"`
Address string `json:"address"`
} `json:"nonResolvableDomain"`
} `json:"query"`
KubeDNSPods []string `json:"kubeDNSPods"`
KubeDNSService string `json:"kubeDNSService"`
KubeDNSEndpoints string `json:"kubeDNSEndpoints"`
}

func (c *CollectDNS) Title() string {
return getCollectorName(c)
}
Expand All @@ -48,32 +68,57 @@ func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult,
defer cancel()

sb := strings.Builder{}
dnsDebug := DNSTroubleshootResult{}

// get kubernetes Cluster IP
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
dnsDebug.KubernetesClusterIP = clusterIP
} else {
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
}

// run a pod and perform DNS lookup
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
testDomain := c.Collector.NonResolvable
if testDomain == "" {
testDomain = nonResolvableDomain
}
dnsDebug.Query.NonResolvableDomain.Name = testDomain

image := c.Collector.Image
if image == "" {
image = dnsUtilsImage
}

podLog, err := troubleshootDNSFromPod(c.Client, ctx, testDomain, image)
if err == nil {
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", image))
sb.WriteString(podLog)
} else {
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
}

// extract DNS queries from pod log
err = extractDNSQueriesFromPodLog(podLog, &dnsDebug)
if err != nil {
sb.WriteString(fmt.Sprintf("=== Failed to extract DNS queries from pod log: %v\n", err))
}

// is DNS pods running?
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))
kubeDNSPods := getRunningKubeDNSPodNames(c.Client, ctx)
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", kubeDNSPods))
dnsDebug.KubeDNSPods = strings.Split(kubeDNSPods, ", ")

// is DNS service up?
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))
kubeDNSService := getKubeDNSServiceClusterIP(c.Client, ctx)
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", kubeDNSService))
dnsDebug.KubeDNSService = kubeDNSService

// are DNS endpoints exposed?
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))
kubeDNSEndpoints := getKubeDNSEndpoints(c.Client, ctx)
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", kubeDNSEndpoints))
dnsDebug.KubeDNSEndpoints = kubeDNSEndpoints

// get DNS server config
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
Expand All @@ -89,7 +134,16 @@ func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult,

data := sb.String()
output := NewResult()
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))

// save raw debug output
output.SaveResult(c.BundlePath, "dns/debug.txt", bytes.NewBuffer([]byte(data)))

// save structured debug output as JSON file
jsonData, err := json.Marshal(dnsDebug)
if err != nil {
return output, errors.Wrap(err, "failed to marshal DNS troubleshooting data")
}
output.SaveResult(c.BundlePath, "dns/debug.json", bytes.NewBuffer(jsonData))

return output, nil
}
Expand All @@ -104,14 +158,17 @@ func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (s
return service.Spec.ClusterIP, nil
}

func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context, nonResolvableDomain string, image string) (string, error) {
namespace := "default"
command := []string{"/bin/sh", "-c", `
set -x
command := []string{"/bin/sh", "-c", fmt.Sprintf(`
echo "=== /etc/resolv.conf ==="
cat /etc/resolv.conf
nslookup -debug kubernetes
echo "=== dig kubernetes ==="
dig +search +short kubernetes
echo "=== dig non-existent-domain ==="
dig +short %s
exit 0
`}
`, nonResolvableDomain)}

// TODO: image pull secret?
podLabels := map[string]string{
Expand All @@ -127,7 +184,7 @@ func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (s
Containers: []corev1.Container{
{
Name: "troubleshoot-dns",
Image: dnsUtilsImage,
Image: image,
Command: command,
},
},
Expand Down Expand Up @@ -271,3 +328,35 @@ func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) strin

return strings.Join(endpointStrings, ", ")
}

func extractDNSQueriesFromPodLog(podLog string, dnsDebug *DNSTroubleshootResult) error {
scanner := bufio.NewScanner(strings.NewReader(podLog))

var currentSection string

for scanner.Scan() {
line := scanner.Text()

switch {
case strings.Contains(line, "=== /etc/resolv.conf ==="):
currentSection = "podResolvConf"
case strings.Contains(line, "=== dig kubernetes ==="):
currentSection = "kubernetes"
case strings.Contains(line, "=== dig non-existent-domain ==="):
currentSection = "nonResolvableDomain"
default:
switch currentSection {
case "podResolvConf":
dnsDebug.PodResolvConf += line + "\n"
case "kubernetes":
dnsDebug.Query.Kubernetes.Name = "kubernetes"
dnsDebug.Query.Kubernetes.Address = line
case "nonResolvableDomain":
dnsDebug.Query.NonResolvableDomain.Address = line
}
}
}

return nil

}
41 changes: 41 additions & 0 deletions pkg/collect/dns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"testing"

"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
Expand Down Expand Up @@ -39,3 +40,43 @@ func TestGetKubernetesClusterIP(t *testing.T) {
t.Errorf("expected %s, got %s", k8sSvcIp, clusterIP)
}
}

func TestExtractDNSQueriesFromPodLog(t *testing.T) {
podLog := `
=== /etc/resolv.conf ===
search default.svc.cluster.local svc.cluster.local cluster.local
nameserver 10.43.0.10
options ndots:5
=== dig kubernetes ===
10.43.0.1
=== dig non-existent-domain ===`

expectedResolvConf := `search default.svc.cluster.local svc.cluster.local cluster.local
nameserver 10.43.0.10
options ndots:5
`

expectedKubernetesQuery := struct {
Name string `json:"name"`
Address string `json:"address"`
}{
Name: "kubernetes",
Address: "10.43.0.1",
}

expectedNonResolvableDomainQuery := struct {
Name string `json:"name"`
Address string `json:"address"`
}{
Name: "",
Address: "",
}

dnsDebug := &DNSTroubleshootResult{}
err := extractDNSQueriesFromPodLog(podLog, dnsDebug)
assert.NoError(t, err)

assert.Equal(t, expectedResolvConf, dnsDebug.PodResolvConf)
assert.Equal(t, expectedKubernetesQuery, dnsDebug.Query.Kubernetes)
assert.Equal(t, expectedNonResolvableDomainQuery, dnsDebug.Query.NonResolvableDomain)
}
6 changes: 6 additions & 0 deletions schemas/collector-troubleshoot-v1beta2.json
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,12 @@
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"image": {
"type": "string"
},
"nonResolvable": {
"type": "string"
},
"timeout": {
"type": "string"
}
Expand Down
6 changes: 6 additions & 0 deletions schemas/preflight-troubleshoot-v1beta2.json
Original file line number Diff line number Diff line change
Expand Up @@ -3063,6 +3063,12 @@
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"image": {
"type": "string"
},
"nonResolvable": {
"type": "string"
},
"timeout": {
"type": "string"
}
Expand Down
6 changes: 6 additions & 0 deletions schemas/supportbundle-troubleshoot-v1beta2.json
Original file line number Diff line number Diff line change
Expand Up @@ -3109,6 +3109,12 @@
"exclude": {
"oneOf": [{"type": "string"},{"type": "boolean"}]
},
"image": {
"type": "string"
},
"nonResolvable": {
"type": "string"
},
"timeout": {
"type": "string"
}
Expand Down

0 comments on commit 7484b10

Please sign in to comment.