Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support command improvements #173

Merged
merged 5 commits into from
Sep 22, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 143 additions & 26 deletions cmd/support/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@ limitations under the License.
package main

import (
tar "archive/tar"
"archive/tar"
"bufio"
"bytes"
"compress/gzip"
"fmt"
"io"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strings"
"time"

"github.com/pkg/errors"
"github.com/rancher/elemental-operator/pkg/version"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -72,7 +73,7 @@ var hostName = "unknown"
func main() {
cmd := &cobra.Command{
Use: "elemental-support",
Short: "Gathers logs about the running syste",
Short: "Gathers logs about the running system",
Long: "elemental-support tries to gather as much info as possible with logs about the running system for troubleshooting purposes",
RunE: func(_ *cobra.Command, args []string) error {
if viper.GetBool("debug") {
Expand Down Expand Up @@ -122,7 +123,9 @@ func run() (err error) {
// any sensitive fields?
for _, f := range []string{elementalAgentPlanDir, rancherAgentPlanDir, systemOEMDir, oemDir} {
logrus.Infof("Copying dir %s", f)
copyFilesInDir(f, tempDir)
// Full dest is the /tmp dir + the full real path of the source, so we store the paths as they are in the node
fullDest := filepath.Join(tempDir, f)
copyFilesInDir(f, fullDest)
}

for _, service := range getServices() {
Expand Down Expand Up @@ -153,10 +156,20 @@ func run() (err error) {
out, _ = cmd.CombinedOutput()
_ = os.WriteFile(fmt.Sprintf("%s/elemental-register.version", tempDir), out, os.ModePerm)

// get k8s info
for _, crd := range []string{"pods", "secrets", "nodes", "services", "deployments"} {
logrus.Infof("Getting k8s info for %s", crd)
getK8sResource(crd, tempDir)
// Check if we have a kubeconfig before starting
if k, err := getKubeConfig(); k != "" && err == nil {
// get k8s info
for _, crd := range []string{"pods", "secrets", "nodes", "services", "deployments"} {
logrus.Infof("Getting k8s info for %s", crd)
getK8sResource(crd, tempDir)
}
// get k8s logs
for _, namespace := range []string{"cattle-system", "kube-system", "ingress-nginx"} {
logrus.Infof("Getting k8s logs for namespace %s", namespace)
getK8sPodsLogs("", namespace, tempDir)
}
} else {
logrus.Warnf("No kubeconfig available, skipping getting k8s items")
}

// All done, pack it up into a nice gzip file
Expand Down Expand Up @@ -250,18 +263,30 @@ func compress(src string, buf io.Writer) error {
return nil
}

func getK8sResource(resource string, dest string) {
func getKubeConfig() (string, error) {
var kubectlconfig string
if existsNoWarn(k3sKubeConfig) {
kubectlconfig = k3sKubeConfig
}
if existsNoWarn(rkeKubeConfig) {
kubectlconfig = rkeKubeConfig
// First use it the env var if available
kubectlconfig = os.Getenv("KUBECONFIG")
if kubectlconfig == "" {
if existsNoWarn(k3sKubeConfig) {
kubectlconfig = k3sKubeConfig
}
if existsNoWarn(rkeKubeConfig) {
kubectlconfig = rkeKubeConfig
}
// This should not happen as far as I understand
if existsNoWarn(k3sKubeConfig) && existsNoWarn(rkeKubeConfig) {
return "", errors.New("both kubeconfig exists for k3s and rke2, maybe the deployment is wrong")
}
}
return kubectlconfig, nil
}

// This should not happen as fast as I understand
if existsNoWarn(k3sKubeConfig) && existsNoWarn(rkeKubeConfig) {
logrus.Warn("Both kubeconfig exists for k3s and rke2, maybe the deployment is wrong....")
func getK8sResource(resource string, dest string) {
kubectlconfig, err := getKubeConfig()
if err != nil {
logrus.Warnf("Could not get kubeconfig, skipping k8s info gathering: %s", err)
return
}

if _, kubectlExists := exec.LookPath("kubectl"); kubectlExists != nil {
Expand All @@ -280,19 +305,56 @@ func getK8sResource(resource string, dest string) {
logrus.Warnf("Failed to get %s", resource)
}
// We still want to write the output if the resource was not found or another issue occured as that can shed info on whats going on
_ = os.WriteFile(fmt.Sprintf("%s/%s.log", dest, resource), out, os.ModePerm)
_ = os.WriteFile(fmt.Sprintf("%s/%s-resource.log", dest, resource), out, os.ModePerm)
}
}

// copyFilesInDir copies all files in the source dir to destination path, keeping the name. Follows dirs inside dirs.
func copyFilesInDir(sourceDir string, destpath string) {
destDir := path.Base(destpath)
fullDestDir := fmt.Sprintf("%s/%s", destpath, destDir)
_ = os.MkdirAll(fullDestDir, os.ModeDir)
cmd := exec.Command("cp", "--recursive", sourceDir, fullDestDir)
_, err := cmd.CombinedOutput()
// getK8sPodsLogs gets the logs of the given resource on the given namespace, unless the resource is empty, in which case it
// will get ALL the logs for ALL the pods in a given namespace
func getK8sPodsLogs(resource, namespace, dest string) {
var podNames []string
kubectlconfig, err := getKubeConfig()
if err != nil {
logrus.Warnf("Failed to copy %s to %s", sourceDir, fullDestDir)
logrus.Warnf("Could not get kubeconfig, skipping k8s info gathering: %s", err)
return
}

if _, kubectlExists := exec.LookPath("kubectl"); kubectlExists != nil {
if resource == "" {
cmd := exec.Command(
"kubectl",
"get",
"pods",
"-n",
namespace,
"-o",
"jsonpath='{.items[*].metadata.name}'",
)
out, err := cmd.CombinedOutput()
if err != nil {
logrus.Warnf("Failed to get pod names in namespace %s: %s", namespace, err)
}
podNames = strings.Split(string(out), " ")
} else {
podNames = []string{resource}
}

for _, pod := range podNames {
cmd := exec.Command(
"kubectl",
fmt.Sprintf("--kubeconfig=%s", kubectlconfig),
"logs",
pod,
"-n",
namespace,
)
out, err := cmd.CombinedOutput()
if err != nil {
logrus.Warnf("Failed to get %s on namespace %s: %s", resource, pod, err)
}
// We still want to write the output if the resource was not found or another issue occurred as that can shed info on what's going on
_ = os.WriteFile(fmt.Sprintf("%s/%s-%s-logs.log", dest, resource, pod), out, os.ModePerm)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why did you decide to suppress the error? If that is intentional then suggested change should work too

Suggested change
_ = os.WriteFile(fmt.Sprintf("%s/%s-%s-logs.log", dest, resource, pod), out, os.ModePerm)
os.WriteFile(fmt.Sprintf("%s/%s-%s-logs.log", dest, resource, pod), out, os.ModePerm)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

linting prevents this. Explicitly catch return values

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

supressing errors is becuase this si a best-effort-catch-all util. Do not care about errors, not want to show errors to the users, just get as much logs as possible from the system.

}
}
}

Expand Down Expand Up @@ -381,3 +443,58 @@ func getServiceLog(service string, dest string) {
}

}

// redactPasswords removes any occurrences of a passwd or password set in yaml/cloud-config files usually
func redactPasswords(input []byte) []byte {
passwd := regexp.MustCompile(`(passwd)([\s=\t\:]{1,3})(.*)`)
password := regexp.MustCompile(`(password)([\s=\t\:]{1,3})(.*)`)
redacted := passwd.ReplaceAll(input, []byte("$1$2*****"))
redacted = password.ReplaceAll(redacted, []byte("$1$2*****"))
return redacted
}

// copyFilesInDir copies all files in the source dir to destination path, keeping the name. Follows dirs inside dirs.
func copyFilesInDir(src, dest string) {
info, err := os.Lstat(src)
if err != nil {
logrus.Warnf("Error opening %s", src)
return
}
switch {
case info.IsDir():
copyDir(src, dest)
default:
copySingleFile(src, dest)
}
}

func copySingleFile(src string, dest string) {
logrus.Debugf("Copying %s into %s", src, dest)
original, err := os.ReadFile(src)
if err != nil {
logrus.Warnf("Failed to read: %s", err)
return
}
redacted := redactPasswords(original)
err = os.MkdirAll(filepath.Dir(dest), os.ModeDir|os.ModePerm)
if err != nil {
logrus.Warnf("Failed to create dir: %s", err)
return
}
err = os.WriteFile(dest, redacted, os.ModePerm)
if err != nil {
logrus.Warnf("Failed to write: %s", err)
return
}
}

func copyDir(srcdir string, destdir string) {
contents, err := os.ReadDir(srcdir)
if err != nil {
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can add a log entry in case it fails?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, dont care if it fails, its not supposed to give errors to the user, just tryu to get as much info as possible otherwise you get an error spouting util on your already broken system. Bad vibes :)

}
for _, content := range contents {
cs, cd := filepath.Join(srcdir, content.Name()), filepath.Join(destdir, content.Name())
copyFilesInDir(cs, cd)
}
}