diff --git a/cmd/dequeuer/main.go b/cmd/dequeuer/main.go index cb9e0a5685..85dad6086c 100644 --- a/cmd/dequeuer/main.go +++ b/cmd/dequeuer/main.go @@ -47,7 +47,7 @@ func main() { userContainerPort int apiName string jobID string - statsdPort int + statsdAddress string apiKind string adminPort int ) @@ -58,8 +58,8 @@ func main() { flag.StringVar(&apiKind, "api-kind", "", fmt.Sprintf("api kind (%s|%s)", userconfig.BatchAPIKind.String(), userconfig.AsyncAPIKind.String())) flag.StringVar(&apiName, "api-name", "", "api name") flag.StringVar(&jobID, "job-id", "", "job ID") + flag.StringVar(&statsdAddress, "statsd-address", "", "address to push statsd metrics") flag.IntVar(&userContainerPort, "user-port", 8080, "target port to which the dequeued messages will be sent to") - flag.IntVar(&statsdPort, "statsd-port", 9125, "port for to send udp statsd metrics") flag.IntVar(&adminPort, "admin-port", 0, "port where the admin server (for the probes) will be exposed") flag.Parse() @@ -69,8 +69,6 @@ func main() { version = consts.CortexVersion } - hostIP := os.Getenv("HOST_IP") - log := logging.GetLogger() defer func() { _ = log.Sync() @@ -158,7 +156,7 @@ func main() { TargetURL: targetURL, } - metricsClient, err := statsd.New(fmt.Sprintf("%s:%d", hostIP, statsdPort)) + metricsClient, err := statsd.New(statsdAddress) if err != nil { exit(log, err, "unable to initialize metrics client") } diff --git a/manager/manifests/prometheus-statsd-exporter.yaml b/manager/manifests/prometheus-statsd-exporter.yaml index eade7089cb..b96a700ea6 100644 --- a/manager/manifests/prometheus-statsd-exporter.yaml +++ b/manager/manifests/prometheus-statsd-exporter.yaml @@ -23,17 +23,13 @@ data: observer_type: histogram --- - apiVersion: apps/v1 -kind: DaemonSet +kind: Deployment metadata: name: prometheus-statsd-exporter namespace: default spec: - updateStrategy: - type: RollingUpdate - rollingUpdate: - maxUnavailable: 1 + replicas: 1 selector: matchLabels: name: prometheus-statsd-exporter @@ -62,7 +58,6 @@ spec: protocol: TCP - name: statsd-udp containerPort: 9125 - hostPort: 9125 protocol: UDP livenessProbe: httpGet: @@ -86,16 +81,23 @@ spec: items: - key: statsd-mapping.yaml path: statsd-mapping.yaml - nodeSelector: - workload: "true" terminationGracePeriodSeconds: 60 - tolerations: - - key: aws.amazon.com/neuron - operator: Exists - effect: NoSchedule - - key: nvidia.com/gpu - operator: Exists - effect: NoSchedule - - key: workload - operator: Exists - effect: NoSchedule + +--- +apiVersion: v1 +kind: Service +metadata: + namespace: default + name: prometheus-statsd-exporter + labels: + cortex.dev/name: prometheus-statsd-exporter +spec: + selector: + name: prometheus-statsd-exporter + ports: + - port: 9125 + name: statsd-udp + protocol: UDP + - port: 9102 + name: metrics + protocol: TCP diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go index 1d1471fe76..b1989fd093 100644 --- a/pkg/consts/consts.go +++ b/pkg/consts/consts.go @@ -35,10 +35,7 @@ var ( AdminPortName = "admin" AdminPortStr = "15000" AdminPortInt32 = int32(15000) - - StatsDPortStr = "9125" - - AuthHeader = "X-Cortex-Authorization" + AuthHeader = "X-Cortex-Authorization" DefaultInClusterConfigPath = "/configs/cluster/cluster.yaml" MaxBucketLifecycleRules = 100 diff --git a/pkg/operator/resources/validations.go b/pkg/operator/resources/validations.go index 3a01f3b7e8..ebbc5514f6 100644 --- a/pkg/operator/resources/validations.go +++ b/pkg/operator/resources/validations.go @@ -106,23 +106,21 @@ func ValidateClusterAPIs(apis []userconfig.API) error { CPU Reservations: FluentBit 100 -StatsDExporter 100 NodeExporter 110 (it has two containers) KubeProxy 100 AWS cni 10 Reserved (150 + 150) see eks.yaml for details */ -var _cortexCPUReserve = kresource.MustParse("720m") +var _cortexCPUReserve = kresource.MustParse("620m") /* Memory Reservations: FluentBit 150 -StatsDExporter 100 NodeExporter 200 (it has two containers) Reserved (300 + 300 + 200) see eks.yaml for details */ -var _cortexMemReserve = kresource.MustParse("1250Mi") +var _cortexMemReserve = kresource.MustParse("1150Mi") var _nvidiaCPUReserve = kresource.MustParse("100m") var _nvidiaMemReserve = kresource.MustParse("100Mi") diff --git a/pkg/workloads/k8s.go b/pkg/workloads/k8s.go index 6519d29012..172221efe5 100644 --- a/pkg/workloads/k8s.go +++ b/pkg/workloads/k8s.go @@ -62,6 +62,8 @@ const ( _clusterConfigDirVolume = "cluster-config" _clusterConfigConfigMap = "cluster-config" _clusterConfigDir = "/configs/cluster" + + _statsdAddress = "prometheus-statsd-exporter.default:9125" ) var ( @@ -128,18 +130,11 @@ func asyncDequeuerProxyContainer(api spec.API, queueURL string) (kcore.Container "--queue", queueURL, "--api-kind", api.Kind.String(), "--api-name", api.Name, + "--statsd-address", _statsdAddress, "--user-port", s.Int32(*api.Pod.Port), - "--statsd-port", consts.StatsDPortStr, "--admin-port", consts.AdminPortStr, }, - Env: append(baseEnvVars, kcore.EnvVar{ - Name: "HOST_IP", - ValueFrom: &kcore.EnvVarSource{ - FieldRef: &kcore.ObjectFieldSelector{ - FieldPath: "status.hostIP", - }, - }, - }), + Env: baseEnvVars, Ports: []kcore.ContainerPort{ { Name: consts.AdminPortName, @@ -181,18 +176,11 @@ func batchDequeuerProxyContainer(api spec.API, jobID, queueURL string) (kcore.Co "--api-kind", api.Kind.String(), "--api-name", api.Name, "--job-id", jobID, + "--statsd-address", _statsdAddress, "--user-port", s.Int32(*api.Pod.Port), - "--statsd-port", consts.StatsDPortStr, "--admin-port", consts.AdminPortStr, }, - Env: append(baseEnvVars, kcore.EnvVar{ - Name: "HOST_IP", - ValueFrom: &kcore.EnvVarSource{ - FieldRef: &kcore.ObjectFieldSelector{ - FieldPath: "status.hostIP", - }, - }, - }), + Env: baseEnvVars, ReadinessProbe: &kcore.Probe{ Handler: kcore.Handler{ HTTPGet: &kcore.HTTPGetAction{