Skip to content

ihealth integration #168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions cmd/nginx-supportpkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ package cmd
import (
"fmt"
"os"
"path/filepath"
"slices"
"time"

"github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector"
"github.com/nginxinc/nginx-k8s-supportpkg/pkg/jobs"
Expand All @@ -40,7 +42,7 @@ func Execute() {
Short: "nginx-supportpkg - a tool to create Ingress Controller diagnostics package",
Long: `nginx-supportpkg - a tool to create Ingress Controller diagnostics package`,
Run: func(cmd *cobra.Command, args []string) {

startTime := time.Now()
err := data_collector.NewDataCollector(&collector)
if err != nil {
fmt.Println(fmt.Errorf("unable to start data collector: %s", err))
Expand All @@ -66,17 +68,50 @@ func Execute() {

if collector.AllNamespacesExist() {
failedJobs := 0
totalJobs := len(jobList)
var jobTimings []data_collector.JobInfo

for _, job := range jobList {
fmt.Printf("Running job %s...", job.Name)
err, Skipped := job.Collect(&collector)
if Skipped {

// Record job start and end time to calculate duration
jobStartTime := time.Now()
err, skipped, files := job.Collect(&collector)
jobEndTime := time.Now()
duration := jobEndTime.Sub(jobStartTime)

// Create job info record
jobInfo := data_collector.JobInfo{
Name: job.Name,
StartTime: jobStartTime.UTC().Format(time.RFC3339Nano),
EndTime: jobEndTime.UTC().Format(time.RFC3339Nano),
Duration: duration.String(),
Files: files,
}

if skipped {
fmt.Print(" SKIPPED\n")
} else if err != nil {
fmt.Printf(" FAILED: %s\n", err)
failedJobs++
} else {
fmt.Print(" COMPLETED\n")
}

jobTimings = append(jobTimings, jobInfo)
}

// Generate manifest with job timings
manifestData, err := collector.GenerateManifest(product, startTime, totalJobs, failedJobs, jobTimings)
if err != nil {
fmt.Printf("Warning: Failed to generate manifest: %v\n", err)
} else {
// Save manifest to base directory
manifestPath := filepath.Join(collector.BaseDir, "manifest.json")
err = os.WriteFile(manifestPath, manifestData, 0644)
if err != nil {
fmt.Printf("Warning: Failed to write manifest: %v\n", err)
}
}

tarFile, err := collector.WrapUp(product)
Expand Down
103 changes: 102 additions & 1 deletion pkg/data_collector/data_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"bytes"
"compress/gzip"
"context"
"encoding/json"
"fmt"
"io"
"log"
Expand All @@ -33,6 +34,7 @@ import (

helmClient "github.com/mittwald/go-helm-client"
"github.com/nginxinc/nginx-k8s-supportpkg/pkg/crds"
"github.com/nginxinc/nginx-k8s-supportpkg/pkg/version"
corev1 "k8s.io/api/core/v1"
crdClient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -60,6 +62,64 @@ type DataCollector struct {
ExcludeTimeSeriesData bool
}

type Manifest struct {
Version string `json:"version"`
Timestamp TimestampInfo `json:"ts"`
PackageType string `json:"package_type"`
RootDir string `json:"root_dir,omitempty"`
Commands []Command `json:"commands,omitempty"`
ProductInfo ProductInfo `json:"product_info"`
PlatformInfo PlatformInfo `json:"platform_info"`
Packages []SubPackage `json:"packages,omitempty"`
}

type TimestampInfo struct {
Start string `json:"start"`
Stop string `json:"stop"`
}

type Command struct {
Name string `json:"name"`
Cwd string `json:"cwd"`
Ts CommandTiming `json:"ts"`
Output string `json:"output"`
RetCode int `json:"retcode,omitempty"`
}

type CommandTiming struct {
Start string `json:"start"`
End string `json:"end"`
}

type ProductInfo struct {
Product string `json:"product"`
Version string `json:"version"`
}

type PlatformInfo struct {
// Add platform-specific fields as needed
K8sVersion string `json:"k8s_version,omitempty"`
Namespaces []string `json:"namespaces,omitempty"`
}

type SubPackage struct {
Path string `json:"path"`
Ts TimestampInfo `json:"ts"`
SubPackageType string `json:"sub_package_type"`
Name string `json:"name,omitempty"`
ID string `json:"id,omitempty"`
}

type JobInfo struct {
Name string `json:"name"`
StartTime string `json:"start_time"`
EndTime string `json:"end_time"`
Duration string `json:"duration"`
Status string `json:"status"` // "completed", "failed", "skipped"
Error string `json:"error,omitempty"`
Files []string `json:"files,omitempty"` // List of files generated by the job
}

func NewDataCollector(collector *DataCollector) error {

tmpDir, err := os.MkdirTemp("", "-pkg-diag")
Expand Down Expand Up @@ -108,7 +168,7 @@ func (c *DataCollector) WrapUp(product string) (string, error) {
unixTime := time.Now().Unix()
unixTimeString := strconv.FormatInt(unixTime, 10)
tarballName := fmt.Sprintf("%s-supportpkg-%s.tar.gz", product, unixTimeString)
tarballRootDirName := fmt.Sprintf("%s-supportpkg-%s", product, unixTimeString)
tarballRootDirName := "."

err := c.LogFile.Close()
if err != nil {
Expand Down Expand Up @@ -266,3 +326,44 @@ func (c *DataCollector) AllNamespacesExist() bool {

return allExist
}

func (c *DataCollector) GenerateManifest(product string, startTime time.Time, jobsRun, jobsFailed int, jobTimings []JobInfo) ([]byte, error) {
manifest := Manifest{
Version: "1.2", // Match the schema version
Timestamp: TimestampInfo{
Start: startTime.UTC().Format(time.RFC3339Nano),
Stop: time.Now().UTC().Format(time.RFC3339Nano),
},
PackageType: "root", // As defined in schema enum
RootDir: ".",
ProductInfo: ProductInfo{
Product: product,
Version: version.Version,
},
PlatformInfo: PlatformInfo{
Namespaces: c.Namespaces,
},
Commands: []Command{},
}

// Convert job timings to commands format
for _, job := range jobTimings {
for _, filename := range job.Files {
command := Command{
Name: job.Name,
Cwd: ".",
Ts: CommandTiming{
Start: job.StartTime,
End: job.EndTime,
},
Output: filename,
}
if job.Status == "failed" {
command.RetCode = 1
}
manifest.Commands = append(manifest.Commands, command)
}
}

return json.MarshalIndent(manifest, "", " ")
}
36 changes: 26 additions & 10 deletions pkg/jobs/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"fmt"
"os"
"path/filepath"
"strings"
"time"

"github.com/nginxinc/nginx-k8s-supportpkg/pkg/data_collector"
Expand All @@ -40,7 +41,7 @@ type JobResult struct {
Skipped bool
}

func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) {
func (j Job) Collect(dc *data_collector.DataCollector) (error, bool, []string) {
ch := make(chan JobResult, 1)

ctx, cancel := context.WithTimeout(context.Background(), j.Timeout)
Expand All @@ -52,32 +53,47 @@ func (j Job) Collect(dc *data_collector.DataCollector) (error, bool) {
select {
case <-ctx.Done():
dc.Logger.Printf("\tJob %s has timed out: %s\n---\n", j.Name, ctx.Err())
return fmt.Errorf("Context cancelled: %v", ctx.Err()), false
return fmt.Errorf("Context cancelled: %v", ctx.Err()), false, nil

case jobResults := <-ch:
files := j.GetFilesFromJobResult(dc, jobResults)
if jobResults.Skipped {
dc.Logger.Printf("\tJob %s has been skipped\n---\n", j.Name)
return nil, true
}
if jobResults.Error != nil {
dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error)
return jobResults.Error, false
return nil, true, files
}

for fileName, fileValue := range jobResults.Files {
err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm)
if err != nil {
return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped
return fmt.Errorf("MkdirAll failed: %v", err), jobResults.Skipped, files
}
file, _ := os.Create(fileName)
_, err = file.Write(fileValue)
if err != nil {
return fmt.Errorf("Write failed: %v", err), jobResults.Skipped
return fmt.Errorf("Write failed: %v", err), jobResults.Skipped, files
}
_ = file.Close()
dc.Logger.Printf("\tJob %s wrote %d bytes to %s\n", j.Name, len(fileValue), fileName)
}

if jobResults.Error != nil {
dc.Logger.Printf("\tJob %s has failed: %s\n", j.Name, jobResults.Error)
fmt.Printf("Files collected so far: %v\n", files)
return jobResults.Error, false, files
}

dc.Logger.Printf("\tJob %s completed successfully\n---\n", j.Name)
return nil, jobResults.Skipped
return nil, false, files
}
}

func (j Job) GetFilesFromJobResult(dc *data_collector.DataCollector, jobResult JobResult) []string {
files := make([]string, 0, len(jobResult.Files))
for filename := range jobResult.Files {
if len(filename) > 0 {
packagePath := strings.TrimPrefix(filename, dc.BaseDir)
files = append(files, packagePath)
}
}
return files
}
9 changes: 1 addition & 8 deletions pkg/jobs/nim_job_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package jobs

import (
"context"
"os"
"path/filepath"
"strings"
"time"
Expand Down Expand Up @@ -246,13 +245,7 @@ func NIMJobList() []Job {
jobResult.Error = err
dc.Logger.Printf("\tFailed to copy dumped file %s from pod %s in namespace %s to %s: %v\n", config.outputFile, pod.Name, namespace, destPathFilename, err)
} else {
err = os.WriteFile(destPathFilename, fileContent, 0644)
if err != nil {
jobResult.Error = err
dc.Logger.Printf("\tFailed to write file to %s: %v\n", destPathFilename, err)
} else {
dc.Logger.Printf("\tSuccessfully copied dumped file %s from pod %s in namespace %s to %s\n", config.outputFile, pod.Name, namespace, destPathFilename)
}
jobResult.Files[destPathFilename] = fileContent
}

// Remove/delete the dumped file from the pod
Expand Down