From d1557bc283dd82c3bd6f8ad5108d6b49831b2626 Mon Sep 17 00:00:00 2001 From: Alex Vulaj Date: Tue, 4 Jun 2024 13:38:37 -0400 Subject: [PATCH] Don't inject must-gather fields directly into job yaml (#138) * Don't inject must-gather fields directly into job yaml * Remove yaml template entirely --- README.md | 1 - build/Dockerfile | 7 +- build/templates/job.template.yaml | 107 ---------- .../mustgather/mustgather_controller.go | 63 ++---- .../mustgather/mustgather_controller_test.go | 3 - controllers/mustgather/template.go | 199 ++++++++++++++++++ 6 files changed, 215 insertions(+), 165 deletions(-) delete mode 100644 build/templates/job.template.yaml create mode 100644 controllers/mustgather/template.go diff --git a/README.md b/README.md index d21bcab9..c03f0e05 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,5 @@ Using the [operator-sdk](https://github.com/operator-framework/operator-sdk), ru oc apply -f deploy/crds/managed.openshift.io_mustgathers_crd.yaml oc new-project must-gather-operator export DEFAULT_MUST_GATHER_IMAGE='quay.io/openshift/origin-must-gather:latest' -export JOB_TEMPLATE_FILE_NAME=./build/templates/job.template.yaml OPERATOR_NAME=must-gather-operator operator-sdk run --verbose --local --namespace '' ``` diff --git a/build/Dockerfile b/build/Dockerfile index 21abb112..9aee5e63 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -2,8 +2,7 @@ FROM quay.io/app-sre/boilerplate:image-v5.0.0 AS builder ENV OPERATOR=/usr/local/bin/must-gather-operator \ OPERATOR_BIN=must-gather-operator \ USER_UID=1001 \ - USER_NAME=must-gather-operator \ - JOB_TEMPLATE_FILE_NAME=/etc/templates/job.template.yaml + USER_NAME=must-gather-operator RUN mkdir /src @@ -21,8 +20,7 @@ FROM registry.access.redhat.com/ubi8/ubi-minimal:8.10-896.1716497715 ENV OPERATOR=/usr/local/bin/must-gather-operator \ OPERATOR_BIN=must-gather-operator \ USER_UID=1001 \ - USER_NAME=must-gather-operator \ - JOB_TEMPLATE_FILE_NAME=/etc/templates/job.template.yaml + USER_NAME=must-gather-operator RUN microdnf install tar gzip openssh-clients wget shadow-utils procps && \ wget https://kojipkgs.fedoraproject.org/packages/sshpass/1.06/9.el8/x86_64/sshpass-1.06-9.el8.x86_64.rpm && \ @@ -32,7 +30,6 @@ RUN microdnf install tar gzip openssh-clients wget shadow-utils procps && \ COPY --from=builder /src/build/_output/bin/${OPERATOR_BIN} /usr/local/bin/${OPERATOR_BIN} COPY --from=builder /src/build/bin /usr/local/bin -COPY --from=builder /src/build/templates /etc/templates RUN /usr/local/bin/user_setup diff --git a/build/templates/job.template.yaml b/build/templates/job.template.yaml deleted file mode 100644 index 66f56134..00000000 --- a/build/templates/job.template.yaml +++ /dev/null @@ -1,107 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: {{ .ObjectMeta.Name }} - namespace: {{ .ObjectMeta.Namespace }} -spec: - template: - spec: - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - preference: - matchExpressions: - - key: node-role.kubernetes.io/infra - operator: Exists - weight: 1 - tolerations: - - effect: NoSchedule - key: node-role.kubernetes.io/infra - operator: Exists - restartPolicy: OnFailure - shareProcessNamespace: true - containers: -{{- $audit := .Spec.Audit}} -{{- $timeout := .Spec.MustGatherTimeout.Duration}} - - command: - - /bin/bash - - -c - - | -{{ if $audit }} - timeout {{ $timeout }} bash -x -c -- '/usr/bin/gather_audit_logs' -{{ else }} - timeout {{ $timeout }} bash -x -c -- '/usr/bin/gather' -{{ end }} - status=$? - if [[ $status -eq 124 || $status -eq 137 ]]; then - echo "Gather timed out." - exit 0 - fi - image: quay.io/openshift/origin-must-gather:MUST_GATHER_IMAGE_DONT_CHANGE - name: gather - volumeMounts: - - mountPath: /must-gather - name: must-gather-output - - command: - - /bin/bash - - -c - - | - count=0 - until [ $count -gt 4 ] - do - while `pgrep -a gather > /dev/null` - do - echo "waiting for gathers to complete ..." - sleep 120 - count=0 - done - echo "no gather is running ($count / 4)" - ((count++)) - sleep 30 - done - /usr/local/bin/upload - image: THIS_STRING_WILL_BE_REPLACED_BUT_DONT_CHANGE_IT - name: upload - volumeMounts: - - mountPath: /must-gather - name: must-gather-output - - mountPath: /must-gather-upload - name: must-gather-upload - env: - {{ if .Spec.ProxyConfig.HTTPProxy -}} - - name: http_proxy - value: {{ .Spec.ProxyConfig.HTTPProxy }} - {{ end -}} - {{ if .Spec.ProxyConfig.HTTPSProxy -}} - - name: https_proxy - value: {{ .Spec.ProxyConfig.HTTPSProxy }} - {{ end -}} - {{ if .Spec.ProxyConfig.NoProxy -}} - - name: no_proxy - value: {{ .Spec.ProxyConfig.NoProxy }} - {{ end -}} - - name: username - valueFrom: - secretKeyRef: - name: {{ .Spec.CaseManagementAccountSecretRef.Name }} - key: username - - name: password - valueFrom: - secretKeyRef: - name: {{ .Spec.CaseManagementAccountSecretRef.Name }} - key: password - - name: caseid - value: "{{ .Spec.CaseID }}" - - name: must_gather_output - value: /must-gather - - name: must_gather_upload - value: /must-gather-upload - - name: internal_user - value: "{{ .Spec.InternalUser }}" - serviceAccountName: {{ .Spec.ServiceAccountRef.Name }} - volumes: - - emptyDir: {} - name: must-gather-output - - emptyDir: {} - name: must-gather-upload - diff --git a/controllers/mustgather/mustgather_controller.go b/controllers/mustgather/mustgather_controller.go index 5f4d3f99..d0f32325 100644 --- a/controllers/mustgather/mustgather_controller.go +++ b/controllers/mustgather/mustgather_controller.go @@ -27,11 +27,9 @@ import ( "github.com/openshift/must-gather-operator/pkg/k8sutil" "github.com/openshift/must-gather-operator/pkg/localmetrics" "github.com/redhat-cop/operator-utils/pkg/util" - "github.com/redhat-cop/operator-utils/pkg/util/templates" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "os" "reflect" @@ -40,47 +38,16 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "strings" - "text/template" ) -const ControllerName = "mustgather-controller" +const ( + ControllerName = "mustgather-controller" -const templateFileNameEnv = "JOB_TEMPLATE_FILE_NAME" -const defaultMustGatherNamespace = "openshift-must-gather-operator" + defaultMustGatherNamespace = "openshift-must-gather-operator" +) var log = logf.Log.WithName(ControllerName) -var jobTemplate *template.Template - -func initializeTemplate(clusterVersion string) (*template.Template, error) { - templateFileName, ok := os.LookupEnv(templateFileNameEnv) - if !ok { - templateFileName = "/etc/templates/job.template.yaml" - } - text, err := os.ReadFile(templateFileName) - if err != nil { - log.Error(err, "Error reading job template file", "filename", templateFileName) - return &template.Template{}, err - } - // Inject the operator image URI from the pod's env variables - operatorImage, varPresent := os.LookupEnv("OPERATOR_IMAGE") - if !varPresent { - err := goerror.New("Operator image environment variable not found") - log.Error(err, "Error: no operator image found for job template") - return &template.Template{}, err - } - // TODO: make these normal template parameters instead. This is ugly but works - str := strings.Replace(string(text), "THIS_STRING_WILL_BE_REPLACED_BUT_DONT_CHANGE_IT", operatorImage, 1) - str = strings.Replace(str, "MUST_GATHER_IMAGE_DONT_CHANGE", clusterVersion, 1) - jobTemplate, err := template.New("MustGatherJob").Parse(str) - if err != nil { - log.Error(err, "Error parsing template", "template", str) - return &template.Template{}, err - } - return jobTemplate, err -} - // blank assignment to verify that MustGatherReconciler implements reconcile.Reconciler var _ reconcile.Reconciler = &MustGatherReconciler{} @@ -367,23 +334,21 @@ func (r *MustGatherReconciler) addFinalizer(reqLogger logr.Logger, m *mustgather return nil } -func (r *MustGatherReconciler) getJobFromInstance(instance *mustgatherv1alpha1.MustGather) (*unstructured.Unstructured, error) { +func (r *MustGatherReconciler) getJobFromInstance(instance *mustgatherv1alpha1.MustGather) (*batchv1.Job, error) { + // Inject the operator image URI from the pod's env variables + operatorImage, varPresent := os.LookupEnv("OPERATOR_IMAGE") + if !varPresent { + err := goerror.New("Operator image environment variable not found") + log.Error(err, "Error: no operator image found for job template") + return nil, err + } + version, err := r.getClusterVersionForJobTemplate("version") if err != nil { return nil, fmt.Errorf("failed to get cluster version for job template: %w", err) } - jobTemplate, err = initializeTemplate(version) - if err != nil { - log.Error(err, "unable to initialize job template") - return &unstructured.Unstructured{}, err - } - unstructuredJob, err := templates.ProcessTemplate(context.TODO(), instance, jobTemplate) - if err != nil { - log.Error(err, "unable to process", "template", jobTemplate, "with parameter", instance) - return &unstructured.Unstructured{}, err - } - return unstructuredJob, nil + return getJobTemplate(operatorImage, version, *instance), nil } func (r *MustGatherReconciler) getClusterVersionForJobTemplate(clusterVersionName string) (string, error) { diff --git a/controllers/mustgather/mustgather_controller_test.go b/controllers/mustgather/mustgather_controller_test.go index f7585421..2dd1ea15 100644 --- a/controllers/mustgather/mustgather_controller_test.go +++ b/controllers/mustgather/mustgather_controller_test.go @@ -3,7 +3,6 @@ package mustgather import ( "context" configv1 "github.com/openshift/api/config/v1" - "os" "sigs.k8s.io/controller-runtime/pkg/client" "testing" @@ -24,8 +23,6 @@ import ( ) func TestMustGatherController(t *testing.T) { - os.Setenv("JOB_TEMPLATE_FILE_NAME", "../../../build/templates/job.template.yaml") - mgObj := createMustGatherObject() secObj := createMustGatherSecretObject() diff --git a/controllers/mustgather/template.go b/controllers/mustgather/template.go new file mode 100644 index 00000000..e473944a --- /dev/null +++ b/controllers/mustgather/template.go @@ -0,0 +1,199 @@ +package mustgather + +import ( + "fmt" + "github.com/openshift/must-gather-operator/api/v1alpha1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "strconv" + "time" +) + +const ( + infraNodeLabelKey = "node-role.kubernetes.io/infra" + outputVolumeName = "must-gather-output" + uploadVolumeName = "must-gather-upload" + volumeMountPath = "/must-gather" + volumeUploadMountPath = "/must-gather-upload" + + gatherCommandBinaryAudit = "gather_audit_logs" + gatherCommandBinaryNoAudit = "gather" + gatherCommand = "\ntimeout %v bash -x -c -- '/usr/bin/%v'\n\nstatus=$?\nif [[ $status -eq 124 || $status -eq 137 ]]; then\n echo \"Gather timed out.\"\n exit 0\nfi" + mustGatherImage = "quay.io/openshift/origin-must-gather" + gatherContainerName = "gather" + + uploadContainerName = "upload" + uploadEnvUsername = "username" + uploadEnvPassword = "password" + uploadEnvCaseId = "caseid" + uploadEnvInternalUser = "internal_user" + uploadEnvHttpProxy = "http_proxy" + uploadEnvHttpsProxy = "https_proxy" + uploadEnvNoProxy = "no_proxy" + uploadCommand = "count=0\nuntil [ $count -gt 4 ]\ndo\n while `pgrep -a gather > /dev/null`\n do\n echo \"waiting for gathers to complete ...\" \n sleep 120\n count=0\n done\n echo \"no gather is running ($count / 4)\"\n ((count++))\n sleep 30\ndone\n/usr/local/bin/upload\n" +) + +func getJobTemplate(operatorImage string, clusterVersion string, mustGather v1alpha1.MustGather) *batchv1.Job { + job := initializeJobTemplate(mustGather.Name, mustGather.Namespace, mustGather.Spec.ServiceAccountRef.Name) + + job.Spec.Template.Spec.Containers = append( + job.Spec.Template.Spec.Containers, + getGatherContainer(mustGather.Spec.Audit, mustGather.Spec.MustGatherTimeout.Duration, clusterVersion), + getUploadContainer( + operatorImage, + mustGather.Spec.CaseID, + mustGather.Spec.InternalUser, + mustGather.Spec.ProxyConfig.HTTPProxy, + mustGather.Spec.ProxyConfig.HTTPSProxy, + mustGather.Spec.ProxyConfig.NoProxy, + ), + ) + return job +} + +func initializeJobTemplate(name string, namespace string, serviceAccountRef string) *batchv1.Job { + return &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: infraNodeLabelKey, + Operator: corev1.NodeSelectorOpExists, + }, + }, + }, + Weight: 1, + }, + }, + }, + }, + Tolerations: []corev1.Toleration{ + { + Effect: corev1.TaintEffectNoSchedule, + Key: infraNodeLabelKey, + Operator: corev1.TolerationOpExists, + }, + }, + RestartPolicy: corev1.RestartPolicyOnFailure, + ShareProcessNamespace: ToPtr(true), + Volumes: []corev1.Volume{ + { + Name: outputVolumeName, + VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, + }, + { + Name: uploadVolumeName, + VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, + }, + }, + ServiceAccountName: serviceAccountRef, + }, + }, + }, + } +} + +func getGatherContainer(audit bool, timeout time.Duration, mustGatherImageVersion string) corev1.Container { + var commandBinary string + if audit { + commandBinary = gatherCommandBinaryAudit + } else { + commandBinary = gatherCommandBinaryNoAudit + } + + return corev1.Container{ + Command: []string{ + "/bin/bash", + "-c", + fmt.Sprintf(gatherCommand, timeout, commandBinary), + }, + Image: fmt.Sprintf("%v:%v", mustGatherImage, mustGatherImageVersion), + Name: gatherContainerName, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: volumeMountPath, + Name: outputVolumeName, + }, + }, + } +} + +func getUploadContainer( + operatorImage string, + caseId string, + internalUser bool, + httpProxy string, + httpsProxy string, + noProxy string, +) corev1.Container { + container := corev1.Container{ + Command: []string{ + "/bin/bash", + "-c", + uploadCommand, + }, + Image: operatorImage, + Name: uploadContainerName, + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: volumeMountPath, + Name: outputVolumeName, + }, + { + MountPath: volumeUploadMountPath, + Name: uploadVolumeName, + }, + }, + Env: []corev1.EnvVar{ + { + Name: uploadEnvUsername, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: uploadEnvUsername, + }, + }, + }, + { + Name: uploadEnvPassword, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + Key: uploadEnvPassword, + }, + }, + }, + { + Name: uploadEnvCaseId, + Value: caseId, + }, + { + Name: uploadEnvInternalUser, + Value: strconv.FormatBool(internalUser), + }, + }, + } + + if httpProxy != "" { + container.Env = append(container.Env, corev1.EnvVar{Name: uploadEnvHttpProxy, Value: httpProxy}) + } + if httpsProxy != "" { + container.Env = append(container.Env, corev1.EnvVar{Name: uploadEnvHttpsProxy, Value: httpsProxy}) + } + if noProxy != "" { + container.Env = append(container.Env, corev1.EnvVar{Name: uploadEnvNoProxy, Value: noProxy}) + } + + return container +} + +func ToPtr[T any](t T) *T { return &t }