Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove metrics logic from cmd/anisble-operator/main.go #3466

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions changelog/fragments/rm-ansible-metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# entries is a list of entries to include in
# release notes and/or the migration guide
entries:
- description: >
Remove legacy metrics generation code.
kind: "removal"
# Is this a breaking change?
breaking: true
migration:
header: Remove legacy metrics generation code from cmd/ansible-operator/main.go, and tests/e2e-anisble.sh checks for servicemonitor.
body: TBD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit space

83 changes: 4 additions & 79 deletions cmd/ansible-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,14 @@ package main

import (
"context"
"errors"
"fmt"
"os"
"runtime"
"strconv"
"strings"

"github.com/spf13/pflag"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -44,19 +40,16 @@ import (
"github.com/operator-framework/operator-sdk/pkg/ansible/proxy/controllermap"
"github.com/operator-framework/operator-sdk/pkg/ansible/runner"
"github.com/operator-framework/operator-sdk/pkg/ansible/watches"
kubemetrics "github.com/operator-framework/operator-sdk/pkg/kube-metrics"
"github.com/operator-framework/operator-sdk/pkg/leader"
"github.com/operator-framework/operator-sdk/pkg/log/zap"
"github.com/operator-framework/operator-sdk/pkg/metrics"
sdkVersion "github.com/operator-framework/operator-sdk/version"
)

var (
metricsHost = "0.0.0.0"
log = logf.Log.WithName("cmd")
metricsPort int32 = 8383
operatorMetricsPort int32 = 8686
healthProbePort int32 = 6789
metricsHost = "0.0.0.0"
log = logf.Log.WithName("cmd")
metricsPort int32 = 8383
healthProbePort int32 = 6789
)

func printVersion() {
Expand Down Expand Up @@ -125,7 +118,6 @@ func main() {
os.Exit(1)
}

var gvks []schema.GroupVersionKind
cMap := controllermap.NewControllerMap()
watches, err := watches.Load(f.WatchesFile, f.MaxConcurrentReconciles, f.AnsibleVerbosity)
if err != nil {
Expand Down Expand Up @@ -159,7 +151,6 @@ func main() {
OwnerWatchMap: controllermap.NewWatchMap(),
AnnotationWatchMap: controllermap.NewWatchMap(),
}, w.Blacklist)
gvks = append(gvks, w.GroupVersionKind)
}

operatorName, err := k8sutil.GetOperatorName()
Expand All @@ -175,7 +166,6 @@ func main() {
os.Exit(1)
}

addMetrics(context.TODO(), cfg, gvks)
err = mgr.AddHealthzCheck("ping", healthz.Ping)
if err != nil {
log.Error(err, "Failed to add Healthz check.")
Expand Down Expand Up @@ -213,71 +203,6 @@ func main() {
log.Info("Exiting.")
}

// addMetrics will create the Services and Service Monitors to allow the operator export the metrics by using
// the Prometheus operator
func addMetrics(ctx context.Context, cfg *rest.Config, gvks []schema.GroupVersionKind) {
// Get the namespace the operator is currently deployed in.
operatorNs, err := k8sutil.GetOperatorNamespace()
if err != nil {
if errors.Is(err, k8sutil.ErrRunLocal) {
log.Info("Skipping CR metrics server creation; not running in a cluster.")
return
}
}

if err := serveCRMetrics(cfg, operatorNs, gvks); err != nil {
log.Info("Could not generate and serve custom resource metrics", "error", err.Error())
}

// Add to the below struct any other metrics ports you want to expose.
servicePorts := []v1.ServicePort{
{Port: metricsPort, Name: metrics.OperatorPortName, Protocol: v1.ProtocolTCP,
TargetPort: intstr.IntOrString{Type: intstr.Int, IntVal: metricsPort}},
{Port: operatorMetricsPort, Name: metrics.CRPortName, Protocol: v1.ProtocolTCP,
TargetPort: intstr.IntOrString{Type: intstr.Int, IntVal: operatorMetricsPort}},
}

// Create Service object to expose the metrics port(s).
service, err := metrics.CreateMetricsService(ctx, cfg, servicePorts)
if err != nil {
log.Info("Could not create metrics Service", "error", err.Error())
return
}

// CreateServiceMonitors will automatically create the prometheus-operator ServiceMonitor resources
// necessary to configure Prometheus to scrape metrics from this operator.
services := []*v1.Service{service}

// The ServiceMonitor is created in the same namespace where the operator is deployed
_, err = metrics.CreateServiceMonitors(cfg, operatorNs, services)
if err != nil {
log.Info("Could not create ServiceMonitor object", "error", err.Error())
// If this operator is deployed to a cluster without the prometheus-operator running, it will return
// ErrServiceMonitorNotPresent, which can be used to safely skip ServiceMonitor creation.
if err == metrics.ErrServiceMonitorNotPresent {
log.Info("Install prometheus-operator in your cluster to create ServiceMonitor objects", "error", err.Error())
}
}
}

// serveCRMetrics takes GVKs retrieved from watches and generates metrics based on those types.
// It serves those metrics on "http://metricsHost:operatorMetricsPort".
func serveCRMetrics(cfg *rest.Config, operatorNs string, gvks []schema.GroupVersionKind) error {
// The metrics will be generated from the namespaces which are returned here.
// NOTE that passing nil or an empty list of namespaces in GenerateAndServeCRMetrics will result in an error.
ns, err := kubemetrics.GetNamespacesForMetrics(operatorNs)
if err != nil {
return err
}

// Generate and serve custom resource specific metrics.
err = kubemetrics.GenerateAndServeCRMetrics(cfg, ns, gvks, metricsHost, operatorMetricsPort)
if err != nil {
return err
}
return nil
}

// getAnsibleDebugLog return the value from the ANSIBLE_DEBUG_LOGS it order to
// print the full Ansible logs
func getAnsibleDebugLog() bool {
Expand Down
60 changes: 24 additions & 36 deletions hack/tests/e2e-ansible.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,37 +59,26 @@ test_operator() {
exit 1
fi

header_text "verify that metrics service was created"
if ! timeout 60s bash -c -- "until kubectl get service/memcached-operator-metrics > /dev/null 2>&1; do sleep 1; done";
then
error_text "FAIL: Failed to get metrics service"
operator_logs
exit 1
fi
# TODO @asmacdo to uncomment once new kb layout is merged.

header_text "verify that the metrics endpoint exists (Port 8383)"
if ! timeout 1m bash -c -- "until kubectl run --attach --rm --restart=Never test-metrics --image=$metrics_test_image -- curl -sfo /dev/null http://memcached-operator-metrics:8383/metrics; do sleep 1; done";
then
error_text "FAIL: Failed to verify that metrics endpoint exists"
operator_logs
exit 1
fi
# header_text "verify that metrics service was created"
# if ! timeout 60s bash -c -- "until kubectl get service/memcached-operator-metrics > /dev/null 2>&1; do sleep 1; done";
# then
# error_text "FAIL: Failed to get metrics service"
# operator_logs
# exit 1
# fi

header_text "verify that the metrics endpoint exists (Port 8686)"
if ! timeout 1m bash -c -- "until kubectl run --attach --rm --restart=Never test-metrics --image=$metrics_test_image -- curl -sfo /dev/null http://memcached-operator-metrics:8686/metrics; do sleep 1; done";
then
error_text "FAIL: Failed to verify that metrics endpoint exists"
operator_logs
exit 1
fi
# TODO Add --metrics-addr flag to the ansible operator and default it to 8080.

# header_text "verify that the metrics endpoint exists (Port 8383)"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a TODO here to add a --metrics-addr flag to the ansible operator and default it to 8080.

See #3440

Once we have an equivalent PR merged for ansible, we'll need to update this port to be 8080.

# if ! timeout 1m bash -c -- "until kubectl run --attach --rm --restart=Never test-metrics --image=$metrics_test_image -- curl -sfo /dev/null http://memcached-operator-metrics:8383/metrics; do sleep 1; done";
# then
# error_text "FAIL: Failed to verify that metrics endpoint exists"
# operator_logs
# exit 1
# fi

header_text "verify that the servicemonitor is created"
if ! timeout 1m bash -c -- "until kubectl get servicemonitors/memcached-operator-metrics > /dev/null 2>&1; do sleep 1; done";
then
error_text "FAIL: Failed to get service monitor"
operator_logs
exit 1
fi

header_text "create custom resource (Memcached CR)"
kubectl create -f deploy/crds/ansible.example.com_v1alpha1_memcached_cr.yaml
Expand Down Expand Up @@ -136,13 +125,13 @@ test_operator() {
fi


header_text "verify that metrics reflect cr creation"
if ! timeout 60s bash -c -- "until kubectl run --attach --rm --restart=Never test-metrics --image=$metrics_test_image -- curl -sf http://memcached-operator-metrics:8686/metrics | grep example-memcached; do sleep 1; done";
then
error_text "FAIL: Failed to verify custom resource metrics"
operator_logs
exit 1
fi
# header_text "verify that metrics reflect cr creation"
# if ! timeout 60s bash -c -- "until kubectl run --attach --rm --restart=Never test-metrics --image=$metrics_test_image -- curl -sf http://memcached-operator-metrics:8383/metrics | grep example-memcached; do sleep 1; done";
# then
# error_text "FAIL: Failed to verify custom resource metrics"
# operator_logs
# exit 1
# fi

header_text "get memcached deploy by labels"
memcached_deployment=$(kubectl get deployment -l app=memcached -o jsonpath="{..metadata.name}")
Expand Down Expand Up @@ -197,7 +186,6 @@ cat "$ROOTDIR/test/ansible-memcached/watches-finalizer.yaml" >> memcached-operat
# Append Foo kind to watches to test watching multiple Kinds
cat "$ROOTDIR/test/ansible-memcached/watches-foo-kind.yaml" >> memcached-operator/watches.yaml

install_service_monitor_crd

pushd memcached-operator

Expand Down
28 changes: 0 additions & 28 deletions internal/scaffold/role.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,32 +326,4 @@ rules:
{{- end }}
{{- end }}
{{- end }}
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- "get"
- "create"
- apiGroups:
- apps
resources:
- deployments/finalizers
resourceNames:
- {{ .ProjectName }}
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
- deployments
verbs:
- get
`
84 changes: 0 additions & 84 deletions internal/scaffold/role_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,34 +120,6 @@ rules:
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- "get"
- "create"
- apiGroups:
- apps
resources:
- deployments/finalizers
resourceNames:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
- deployments
verbs:
- get
`

const clusterroleExp = `kind: ClusterRole
Expand Down Expand Up @@ -189,34 +161,6 @@ rules:
- patch
- update
- watch
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- "get"
- "create"
- apiGroups:
- apps
resources:
- deployments/finalizers
resourceNames:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
- deployments
verbs:
- get
`

const roleCustomRulesExp = `kind: Role
Expand Down Expand Up @@ -245,32 +189,4 @@ rules:
resources:
- "roles"
- "rolebindings"
- apiGroups:
- monitoring.coreos.com
resources:
- servicemonitors
verbs:
- "get"
- "create"
- apiGroups:
- apps
resources:
- deployments/finalizers
resourceNames:
- app-operator
verbs:
- "update"
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- apiGroups:
- apps
resources:
- replicasets
- deployments
verbs:
- get
`
4 changes: 0 additions & 4 deletions test/ansible/molecule/cluster/tasks/servicemonitor_test.yml

This file was deleted.