Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Reading Logs in daignose cli command #1521

Merged
merged 9 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,14 @@ cli-install:

.PHONY: cli-upgrade
cli-upgrade:
@echo "Installing odigos from source. version: $(ODIGOS_CLI_VERSION)"
@echo "Upgrading odigos from source. version: $(ODIGOS_CLI_VERSION)"
cd ./cli ; go run -tags=embed_manifests . upgrade --version $(ODIGOS_CLI_VERSION) --yes

.PHONY: cli-diagnose
cli-diagnose:
@echo "Diagnosing cluster data for debugging"
cd ./cli ; go run -tags=embed_manifests . diagnose

.PHONY: api-all
api-all:
make -C api all
Expand Down
216 changes: 216 additions & 0 deletions cli/cmd/diagnose.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
package cmd

import (
"archive/tar"
"compress/gzip"
"context"
"fmt"
"github.com/odigos-io/odigos/cli/cmd/resources"
"github.com/odigos-io/odigos/cli/pkg/kube"
"github.com/spf13/cobra"
"io"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"os"
"path/filepath"
"sync"
"time"
)

const (
logBufferSize = 1024 * 1024 // 1MB buffer size for reading logs in chunks
)

var diagnozeCmd = &cobra.Command{
Use: "diagnose",
Short: "Diagnose Client Cluster",
Long: `Diagnose Client Cluster to identify issues and resolve them. This command is useful for troubleshooting and debugging.`,
Run: func(cmd *cobra.Command, args []string) {
ctx := cmd.Context()
client, err := kube.CreateClient(cmd)
if err != nil {
kube.PrintClientErrorAndExit(err)
}

err = startDiagnose(ctx, client)
if err != nil {
fmt.Printf("The diagnose script crashed on: %v\n", err)
}
},
}

func startDiagnose(ctx context.Context, client *kube.Client) error {
mainTempDir, logTempDir, err := createAllDirs()
if err != nil {
return err
}
defer os.RemoveAll(mainTempDir)

if err := fetchOdigosComponentsLogs(ctx, client, logTempDir); err != nil {
return err
}

if err := createTarGz(mainTempDir); err != nil {
return err
}
return nil
}

func createAllDirs() (string, string, error) {
mainTempDir, err := os.MkdirTemp("", "odigos-diagnose")
if err != nil {
return "", "", err
}

logTempDir := filepath.Join(mainTempDir, "Logs")
err = os.Mkdir(logTempDir, os.ModePerm) // os.ModePerm gives full permissions (0777)
if err != nil {
return "", "", err
}

return mainTempDir, logTempDir, nil
}

func fetchOdigosComponentsLogs(ctx context.Context, client *kube.Client, logDir string) error {
odigosNamespace, err := resources.GetOdigosNamespace(client, ctx)
if err != nil {
return err
}

pods, err := client.CoreV1().Pods(odigosNamespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
return err
}

var wg sync.WaitGroup

for _, pod := range pods.Items {
wg.Add(1)
go func() {
defer wg.Done()
fetchPodLogs(ctx, client, odigosNamespace, pod, logDir)
}()
}

wg.Wait()

return nil
}

func fetchPodLogs(ctx context.Context, client *kube.Client, odigosNamespace string, pod v1.Pod, logDir string) {
for _, container := range pod.Spec.Containers {
logPrefix := fmt.Sprintf("Fetching logs for Pod: %s, Container: %s, Node: %s", pod.Name, container.Name, pod.Spec.NodeName)
fmt.Printf(logPrefix + "\n")

// Define the log file path for saving compressed logs
logFilePath := filepath.Join(logDir, pod.Name+"_"+container.Name+"_"+pod.Spec.NodeName+".log.gz")
logFile, err := os.OpenFile(logFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0666)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should add here
defer logFile.Close()
Instead of the current code which only closes the file at the happy-path where no errors occur.
This will guarantee the file is close when this function returns.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why Not Use defer Inside the Loop?
In a loop, using defer to close a file might seem like a convenient option, but deferred calls are not executed until the surrounding function returns. If you use defer inside the loop, it will accumulate all the defer calls, and the files will remain open until the function (or main program) exits. This can lead to excessive memory or resource usage, especially with a large number of iterations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I read a bit and this is a real cool way to solve this.. The anonymous function will be called after opening the file and because it uses defer and created inside the scope of the for-loop -> it will trigger in the end of the loop (succesfully / with error)

func() {
			defer func() {
				if err := file.Close(); err != nil {
					fmt.Printf("Error closing file %s: %v\n", fileName, err)
				}
			}()

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also extract the code in the loop body to a function and use a defer within that function, that way there is no risc for a long queue of defer functions.

if err != nil {
fmt.Printf(logPrefix+" - Failed - Error creating log file: %v\n", err)
return
}

req := client.CoreV1().Pods(odigosNamespace).GetLogs(pod.Name, &v1.PodLogOptions{})
logStream, err := req.Stream(ctx)
if err != nil {
fmt.Printf(logPrefix+" - Failed - Error creating log stream: %v\n", err)
return
}

if err = saveLogsToGzipFileInBatches(logFile, logStream, logBufferSize); err != nil {
fmt.Printf(logPrefix+" - Failed - Error saving logs to file: %v\n", err)
return
}

logStream.Close()
logFile.Close()
}
}

func saveLogsToGzipFileInBatches(logFile *os.File, logStream io.ReadCloser, bufferSize int) error {
// Create a gzip writer to compress the logs
gzipWriter := gzip.NewWriter(logFile)
defer gzipWriter.Close()

// Read logs in chunks and write them to the file
buffer := make([]byte, bufferSize)
for {
n, err := logStream.Read(buffer)
if n > 0 {
// Write the chunk to the gzip file
if _, err := gzipWriter.Write(buffer[:n]); err != nil {
return err
}
}

if err == io.EOF {
// End of the log stream; break out of the loop
break
}

if err != nil {
return err
}
}

return nil
}

func createTarGz(sourceDir string) error {
timestamp := time.Now().Format("02012006150405")
tarGzFileName := fmt.Sprintf("odigos_debug_%s.tar.gz", timestamp)

tarGzFile, err := os.Create(tarGzFileName)
if err != nil {
return err
}
defer tarGzFile.Close()

gzipWriter := gzip.NewWriter(tarGzFile)
defer gzipWriter.Close()

tarWriter := tar.NewWriter(gzipWriter)
defer tarWriter.Close()

err = filepath.Walk(sourceDir, func(file string, fi os.FileInfo, err error) error {
if err != nil {
return err
}

header, err := tar.FileInfoHeader(fi, fi.Name())
if err != nil {
return err
}

header.Name, err = filepath.Rel(sourceDir, file)
if err != nil {
return err
}

if err := tarWriter.WriteHeader(header); err != nil {
return err
}

if !fi.Mode().IsRegular() {
return nil
}

fileContent, err := os.Open(file)
if err != nil {
return err
}
defer fileContent.Close()

if _, err := io.Copy(tarWriter, fileContent); err != nil {
return err
}

return nil
})

return err
}

func init() {
rootCmd.AddCommand(diagnozeCmd)
}
Loading