Skip to content

Commit

Permalink
Merge pull request #3885 from aledbf/status
Browse files Browse the repository at this point in the history
Refactor status update
  • Loading branch information
k8s-ci-robot authored Mar 13, 2019
2 parents ce2b4b1 + f4e4335 commit e079365
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 127 deletions.
6 changes: 5 additions & 1 deletion internal/ingress/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,11 @@ func (n *NGINXController) syncIngress(interface{}) error {
klog.Infof("Backend successfully reloaded.")
n.metricCollector.ConfigSuccess(hash, true)
n.metricCollector.IncReloadCount()
n.metricCollector.SetSSLExpireTime(servers)

if n.isLeader() {
klog.V(2).Infof("Updating ssl expiration metrics.")
n.metricCollector.SetSSLExpireTime(servers)
}
}

isFirstSync := n.runningConfig.Equal(&ingress.Configuration{})
Expand Down
55 changes: 48 additions & 7 deletions internal/ingress/controller/nginx.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"syscall"
"text/template"
"time"
Expand Down Expand Up @@ -115,6 +116,7 @@ func NewNGINXController(config *Configuration, mc metric.Collector, fs file.File
if err != nil {
klog.Fatalf("unexpected error obtaining pod information: %v", err)
}
n.podInfo = pod

n.store = store.New(
config.EnableSSLChainCompletion,
Expand All @@ -132,15 +134,13 @@ func NewNGINXController(config *Configuration, mc metric.Collector, fs file.File
config.DisableCatchAll)

n.syncQueue = task.NewTaskQueue(n.syncIngress)

if config.UpdateStatus {
n.syncStatus = status.NewStatusSyncer(status.Config{
n.syncStatus = status.NewStatusSyncer(pod, status.Config{
Client: config.Client,
PublishService: config.PublishService,
PublishStatusAddress: config.PublishStatusAddress,
IngressLister: n.store,
ElectionID: config.ElectionID,
IngressClass: class.IngressClass,
DefaultIngressClass: class.DefaultClass,
UpdateStatusOnShutdown: config.UpdateStatusOnShutdown,
UseNodeInternalIP: config.UseNodeInternalIP,
})
Expand Down Expand Up @@ -215,13 +215,15 @@ Error loading new template: %v

// NGINXController describes a NGINX Ingress controller.
type NGINXController struct {
podInfo *k8s.PodInfo

cfg *Configuration

recorder record.EventRecorder

syncQueue *task.Queue

syncStatus status.Sync
syncStatus status.Syncer

syncRateLimiter flowcontrol.RateLimiter

Expand Down Expand Up @@ -254,6 +256,8 @@ type NGINXController struct {
fileSystem filesystem.Filesystem

metricCollector metric.Collector

currentLeader uint32
}

// Start starts a new NGINX master process running in the foreground.
Expand All @@ -262,10 +266,35 @@ func (n *NGINXController) Start() {

n.store.Run(n.stopCh)

if n.syncStatus != nil {
go n.syncStatus.Run()
// we need to use the defined ingress class to allow multiple leaders
// in order to update information about ingress status
electionID := fmt.Sprintf("%v-%v", n.cfg.ElectionID, class.DefaultClass)
if class.IngressClass != "" {
electionID = fmt.Sprintf("%v-%v", n.cfg.ElectionID, class.IngressClass)
}

setupLeaderElection(&leaderElectionConfig{
Client: n.cfg.Client,
ElectionID: electionID,
OnStartedLeading: func(stopCh chan struct{}) {
if n.syncStatus != nil {
go n.syncStatus.Run(stopCh)
}

n.setLeader(true)
n.metricCollector.OnStartedLeading(electionID)
// manually update SSL expiration metrics
// (to not wait for a reload)
n.metricCollector.SetSSLExpireTime(n.runningConfig.Servers)
},
OnStoppedLeading: func() {
n.setLeader(false)
n.metricCollector.OnStoppedLeading(electionID)
},
PodName: n.podInfo.Name,
PodNamespace: n.podInfo.Namespace,
})

cmd := nginxExecCommand()

// put NGINX in another process group to prevent it
Expand Down Expand Up @@ -1099,3 +1128,15 @@ func buildRedirects(servers []*ingress.Server) []*redirect {

return redirectServers
}

func (n *NGINXController) setLeader(leader bool) {
var i uint32
if leader {
i = 1
}
atomic.StoreUint32(&n.currentLeader, i)
}

func (n *NGINXController) isLeader() bool {
return atomic.LoadUint32(&n.currentLeader) != 0
}
123 changes: 123 additions & 0 deletions internal/ingress/controller/status.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package controller

import (
"context"
"os"
"time"

"k8s.io/klog"

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"k8s.io/client-go/tools/record"
)

type leaderElectionConfig struct {
PodName string
PodNamespace string

Client clientset.Interface

ElectionID string

OnStartedLeading func(chan struct{})
OnStoppedLeading func()
}

func setupLeaderElection(config *leaderElectionConfig) {
var elector *leaderelection.LeaderElector

// start a new context
ctx := context.Background()

var cancelContext context.CancelFunc

var newLeaderCtx = func(ctx context.Context) context.CancelFunc {
// allow to cancel the context in case we stop being the leader
leaderCtx, cancel := context.WithCancel(ctx)
go elector.Run(leaderCtx)
return cancel
}

var stopCh chan struct{}
callbacks := leaderelection.LeaderCallbacks{
OnStartedLeading: func(ctx context.Context) {
klog.V(2).Infof("I am the new leader")
stopCh = make(chan struct{})

if config.OnStartedLeading != nil {
config.OnStartedLeading(stopCh)
}
},
OnStoppedLeading: func() {
klog.V(2).Info("I am not leader anymore")
close(stopCh)

// cancel the context
cancelContext()

cancelContext = newLeaderCtx(ctx)

if config.OnStoppedLeading != nil {
config.OnStoppedLeading()
}
},
OnNewLeader: func(identity string) {
klog.Infof("new leader elected: %v", identity)
},
}

broadcaster := record.NewBroadcaster()
hostname, _ := os.Hostname()

recorder := broadcaster.NewRecorder(scheme.Scheme, apiv1.EventSource{
Component: "ingress-leader-elector",
Host: hostname,
})

lock := resourcelock.ConfigMapLock{
ConfigMapMeta: metav1.ObjectMeta{Namespace: config.PodNamespace, Name: config.ElectionID},
Client: config.Client.CoreV1(),
LockConfig: resourcelock.ResourceLockConfig{
Identity: config.PodName,
EventRecorder: recorder,
},
}

ttl := 30 * time.Second
var err error

elector, err = leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{
Lock: &lock,
LeaseDuration: ttl,
RenewDeadline: ttl / 2,
RetryPeriod: ttl / 4,

Callbacks: callbacks,
})
if err != nil {
klog.Fatalf("unexpected error starting leader election: %v", err)
}

cancelContext = newLeaderCtx(ctx)
}
35 changes: 33 additions & 2 deletions internal/ingress/metric/collectors/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ type Controller struct {

constLabels prometheus.Labels
labels prometheus.Labels

leaderElection *prometheus.GaugeVec
}

// NewController creates a new prometheus collector for the
Expand Down Expand Up @@ -112,6 +114,15 @@ func NewController(pod, namespace, class string) *Controller {
},
sslLabelHost,
),
leaderElection: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: PrometheusNamespace,
Name: "leader_election_status",
Help: "Gauge reporting status of the leader election, 0 indicates follower, 1 indicates leader. 'name' is the string used to identify the lease",
ConstLabels: constLabels,
},
[]string{"name"},
),
}

return cm
Expand All @@ -127,6 +138,16 @@ func (cm *Controller) IncReloadErrorCount() {
cm.reloadOperationErrors.With(cm.constLabels).Inc()
}

// OnStartedLeading indicates the pod was elected as the leader
func (cm *Controller) OnStartedLeading(electionID string) {
cm.leaderElection.WithLabelValues(electionID).Set(1.0)
}

// OnStoppedLeading indicates the pod stopped being the leader
func (cm *Controller) OnStoppedLeading(electionID string) {
cm.leaderElection.WithLabelValues(electionID).Set(0)
}

// ConfigSuccess set a boolean flag according to the output of the controller configuration reload
func (cm *Controller) ConfigSuccess(hash uint64, success bool) {
if success {
Expand All @@ -150,6 +171,7 @@ func (cm Controller) Describe(ch chan<- *prometheus.Desc) {
cm.reloadOperation.Describe(ch)
cm.reloadOperationErrors.Describe(ch)
cm.sslExpireTime.Describe(ch)
cm.leaderElection.Describe(ch)
}

// Collect implements the prometheus.Collector interface.
Expand All @@ -160,6 +182,7 @@ func (cm Controller) Collect(ch chan<- prometheus.Metric) {
cm.reloadOperation.Collect(ch)
cm.reloadOperationErrors.Collect(ch)
cm.sslExpireTime.Collect(ch)
cm.leaderElection.Collect(ch)
}

// SetSSLExpireTime sets the expiration time of SSL Certificates
Expand All @@ -179,13 +202,21 @@ func (cm *Controller) SetSSLExpireTime(servers []*ingress.Server) {

// RemoveMetrics removes metrics for hostnames not available anymore
func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer) {
cm.removeSSLExpireMetrics(true, hosts, registry)
}

// RemoveAllSSLExpireMetrics removes metrics for expiration of SSL Certificates
func (cm *Controller) RemoveAllSSLExpireMetrics(registry prometheus.Gatherer) {
cm.removeSSLExpireMetrics(false, []string{}, registry)
}

func (cm *Controller) removeSSLExpireMetrics(onlyDefinedHosts bool, hosts []string, registry prometheus.Gatherer) {
mfs, err := registry.Gather()
if err != nil {
klog.Errorf("Error gathering metrics: %v", err)
return
}

klog.V(2).Infof("removing SSL certificate metrics for %v hosts", hosts)
toRemove := sets.NewString(hosts...)

for _, mf := range mfs {
Expand All @@ -208,7 +239,7 @@ func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer
continue
}

if !toRemove.Has(host) {
if onlyDefinedHosts && !toRemove.Has(host) {
continue
}

Expand Down
6 changes: 6 additions & 0 deletions internal/ingress/metric/dummy.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,9 @@ func (dc DummyCollector) SetSSLExpireTime([]*ingress.Server) {}

// SetHosts ...
func (dc DummyCollector) SetHosts(hosts sets.String) {}

// OnStartedLeading indicates the pod is not the current leader
func (dc DummyCollector) OnStartedLeading(electionID string) {}

// OnStoppedLeading indicates the pod is not the current leader
func (dc DummyCollector) OnStoppedLeading(electionID string) {}
14 changes: 14 additions & 0 deletions internal/ingress/metric/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ type Collector interface {
IncReloadCount()
IncReloadErrorCount()

OnStartedLeading(string)
OnStoppedLeading(string)

RemoveMetrics(ingresses, endpoints []string)

SetSSLExpireTime([]*ingress.Server)
Expand Down Expand Up @@ -147,3 +150,14 @@ func (c *collector) SetSSLExpireTime(servers []*ingress.Server) {
func (c *collector) SetHosts(hosts sets.String) {
c.socket.SetHosts(hosts)
}

// OnStartedLeading indicates the pod was elected as the leader
func (c *collector) OnStartedLeading(electionID string) {
c.ingressController.OnStartedLeading(electionID)
}

// OnStoppedLeading indicates the pod stopped being the leader
func (c *collector) OnStoppedLeading(electionID string) {
c.ingressController.OnStoppedLeading(electionID)
c.ingressController.RemoveAllSSLExpireMetrics(c.registry)
}
Loading

0 comments on commit e079365

Please sign in to comment.