Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filtering old data out of samples #279

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions charts/metrics-agent/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 2.11.33
version: 2.11.35

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: 2.11.33
appVersion: 2.11.35
2 changes: 1 addition & 1 deletion charts/metrics-agent/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pollInterval: 180

image:
name: cloudability/metrics-agent
tag: 2.11.33
tag: 2.11.35
pullPolicy: Always

imagePullSecrets: []
Expand Down
47 changes: 39 additions & 8 deletions kubernetes/kubernetes_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package kubernetes

import (
"bytes"
"context"
"encoding/json"
"net/http"
Expand Down Expand Up @@ -400,6 +401,24 @@ func TestCollectMetrics(t *testing.T) {
return nil
})
})
t.Run("Ensure that resources are properly filtered out", func(t *testing.T) {
filepath.Walk(ka.msExportDirectory.Name(), func(path string, info os.FileInfo, err error) error {
if err != nil {
t.Error(err)
}
if strings.Contains(info.Name(), "jsonl") {
in, err := os.ReadFile(path)
if err != nil {
t.Error(err)
}
fileLen := strings.Count(string(in), "\n")
if fileLen != 1 {
t.Errorf("Expected 1 entry in file %s, got %d", info.Name(), fileLen)
}
}
return nil
})
})
t.Run("Ensure collection occurs with parseMetrics enabled"+
"ensure sensitive data is stripped", func(t *testing.T) {
err = kubeAgentParseMetrics.collectMetrics(context.TODO(), kubeAgentParseMetrics, cs, fns)
Expand Down Expand Up @@ -594,25 +613,37 @@ func getMockInformers(clusterVersion float64, stopCh chan struct{}) (map[string]
nodes.Add(&v1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n1", Annotations: annotation}})
persistentVolumes.Add(&v1.PersistentVolume{ObjectMeta: metav1.ObjectMeta{Name: "pv1", Annotations: annotation}})
persistentVolumeClaims.Add(&v1.PersistentVolumeClaim{ObjectMeta: metav1.ObjectMeta{Name: "pvc1", Annotations: annotation}})
replicaSets.Add(&v1apps.ReplicaSet{ObjectMeta: metav1.ObjectMeta{Name: "rs1", Annotations: annotation}})
replicaSets.Add(&v1apps.ReplicaSet{ObjectMeta: metav1.ObjectMeta{Name: "rs1", Annotations: annotation},
Status: v1apps.ReplicaSetStatus{Replicas: int32(1)}})
// should not be exported as replicaset is empty
replicaSets.Add(&v1apps.ReplicaSet{ObjectMeta: metav1.ObjectMeta{Name: "rs2", Annotations: annotation}})
daemonSets.Add(&v1apps.DaemonSet{ObjectMeta: metav1.ObjectMeta{Name: "ds1", Annotations: annotation}})
jobs.Add(&v1batch.Job{ObjectMeta: metav1.ObjectMeta{Name: "job1", Annotations: annotation}})
oneDayAgo := metav1.NewTime(time.Now().Add(-24 * time.Hour))
// should not be exported as job was completed some time ago
jobs.Add(&v1batch.Job{ObjectMeta: metav1.ObjectMeta{Name: "job2", Annotations: annotation},
Status: v1batch.JobStatus{CompletionTime: &oneDayAgo}})
// should not be exported as job was completed some time ago
jobs.Add(&v1batch.Job{ObjectMeta: metav1.ObjectMeta{Name: "job3", Annotations: annotation},
Status: v1batch.JobStatus{Failed: int32(1), StartTime: &oneDayAgo}})
if clusterVersion > 1.20 {
cronJobs.Add(&v1batch.CronJob{ObjectMeta: metav1.ObjectMeta{Name: "cj1", Annotations: annotation}})
}

// pods is unique as we use this pod file for parseMetrics testing
// for parseMetricData testing, add a cldy metrics-agent pod to the mock informers
// adds 3 pods, two of which completed long ago and will not be added to export sample
podData, err := os.ReadFile("../testdata/pods.jsonl")
if err != nil {
return nil, err
}
var myPod *v1.Pod
err = json.Unmarshal(podData, &myPod)
if err != nil {
return nil, err
dec := json.NewDecoder(bytes.NewReader(podData))
for dec.More() {
var pod v1.Pod
if err := dec.Decode(&pod); err != nil {
return nil, err
}
pods.Add(&pod)
}
pods.Add(myPod)

// namespace also used in testing
namespaceData, err := os.ReadFile("../testdata/namespaces.jsonl")
if err != nil {
Expand Down
36 changes: 35 additions & 1 deletion retrieval/k8s/k8s_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,9 @@ func writeK8sResourceFile(workDir *os.File, resourceName string,
datawriter := bufio.NewWriter(file)

for _, k8Resource := range resourceList {

if shouldSkipResource(k8Resource) {
continue
}
if parseMetricData {
k8Resource = sanitizeData(k8Resource)
}
Expand Down Expand Up @@ -124,6 +126,38 @@ func writeK8sResourceFile(workDir *os.File, resourceName string,
return err
}

//nolint:gocyclo
func shouldSkipResource(k8Resource interface{}) bool {
// safe buffer to allow for longer lived resources to be ingested correctly
previousHour := time.Now().Add(-1 * time.Hour)
switch resource := k8Resource.(type) {
case *v1batch.Job:
if resource.Status.CompletionTime != nil &&
previousHour.After(resource.Status.CompletionTime.Time) {
jdhudson3 marked this conversation as resolved.
Show resolved Hide resolved
return true
}
if resource.Status.Failed > 0 && resource.Status.StartTime != nil &&
previousHour.After(resource.Status.StartTime.Time) {
return true
}
jdhudson3 marked this conversation as resolved.
Show resolved Hide resolved
case *corev1.Pod:
if resource.Status.Phase == corev1.PodSucceeded || resource.Status.Phase == corev1.PodFailed {
canSkip := true
for _, v := range resource.Status.ContainerStatuses {
if v.State.Terminated != nil && v.State.Terminated.FinishedAt.After(previousHour) {
canSkip = false
}
}
Comment on lines +151 to +155
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interest why do we need to this check as Succeeded/Failed are defined below
Succeeded: All containers in the Pod have terminated in success, and will not be restarted.
Failed: All containers in the Pod have terminated, and at least one container has terminated in failure. That is, the container either exited with non-zero status or was terminated by the system, and is not set for automatic restarting.

Copy link
Contributor Author

@jdhudson3 jdhudson3 Nov 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My thought for keeping this data was that we may want information related to recently shut down pods since we can detect that here, and we would want to evaluate the timestamp to ensure that we capture recent shutdowns

We could remove all entries and not impact current allocation methodology.

return canSkip
}
case *v1apps.ReplicaSet:
if resource.Status.Replicas == 0 && previousHour.After(resource.CreationTimestamp.Time) {
jdhudson3 marked this conversation as resolved.
Show resolved Hide resolved
return true
}
}
return false
}

// nolint: gocyclo
func sanitizeData(to interface{}) interface{} {
switch to.(type) {
Expand Down
Loading
Loading