Skip to content

Commit

Permalink
feat: Detect Watcher certificate not renew (kyma-project#1088)
Browse files Browse the repository at this point in the history
* Create docker-publish.yml

* change docker file context

* delete github action

* delete github action

* move metrics to internal pkg

* refactor PurgeMetrics struct

* refactor KymaMetrics struct

* refactor KymaMetrics struct

* add watcher metrics

* introduce SkrWebhookManagerConfig, CertificateConfig, GatewayConfig

* add e2e test

* change metrics to function

* change metrics to function

* change RemoveModuleStateMetrics signature

* introduce ModuleMetrics interface
add unit test for ModuleMetrics

* rename RunnerImpl to Runner

* fix merge conflict

* fix lint

* fix misdeleted IsManagedKyma

* resolve review comment

* fix lint
  • Loading branch information
ruanxin authored Dec 5, 2023
1 parent 68bc783 commit d6940b1
Show file tree
Hide file tree
Showing 26 changed files with 640 additions and 385 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/feature-request.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: Feature Request
description: Suggest an improvement to the project
title: "Feature Title"
labels: ["kind/feature"]
labels: [ "kind/feature" ]
body:
- type: markdown
attributes:
Expand Down
26 changes: 24 additions & 2 deletions .github/workflows/test-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ jobs:
"purge-controller",
"purge-metrics",
"module-upgrade",
"ca-certificate-rotation"
"ca-certificate-rotation",
"self-signed-certificate-rotation"
]
name: "E2E"
needs: [wait-for-img]
Expand Down Expand Up @@ -133,6 +134,24 @@ jobs:
cat purge_finalizer.yaml
kustomize edit add patch --path purge_finalizer.yaml --kind Deployment
popd
- name: Patch self signed certificate lifetime
if: ${{matrix.e2e-test == 'self-signed-certificate-rotation'}}
working-directory: lifecycle-manager
run: |
pushd config/watcher_local_test
echo \
"- op: add
path: /spec/template/spec/containers/0/args/-
value: --self-signed-cert-duration=1h
- op: add
path: /spec/template/spec/containers/0/args/-
value: --self-signed-cert-renew-before=59m
- op: add
path: /spec/template/spec/containers/0/args/-
value: --self-signed-cert-renew-buffer=1m" >> self-signed-cert.yaml
cat self-signed-cert.yaml
kustomize edit add patch --path self-signed-cert.yaml --kind Deployment
popd
- name: Patch CA certificate renewBefore
if: ${{matrix.e2e-test == 'ca-certificate-rotation'}}
working-directory: lifecycle-manager
Expand Down Expand Up @@ -235,7 +254,10 @@ jobs:
kubectl apply -f template.yaml
- name: Expose Metrics Endpoint
working-directory: lifecycle-manager
if: ${{ matrix.e2e-test == 'kyma-metrics' || matrix.e2e-test == 'purge-metrics' }}
if: ${{ matrix.e2e-test == 'kyma-metrics' ||
matrix.e2e-test == 'purge-metrics' ||
matrix.e2e-test == 'self-signed-certificate-rotation'
}}
run: |
kubectl patch svc klm-metrics-service -p '{"spec": {"type": "LoadBalancer"}}' -n kcp-system
- name: Run ${{ matrix.e2e-test }}
Expand Down
72 changes: 43 additions & 29 deletions cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,39 +4,42 @@ import (
"flag"
"time"

"github.com/kyma-project/lifecycle-manager/internal/controller"
"github.com/kyma-project/lifecycle-manager/pkg/log"
)

const (
defaultKymaRequeueSuccessInterval = 30 * time.Second
defaultKymaRequeueErrInterval = 2 * time.Second
defaultKymaRequeueWarningInterval = 30 * time.Second
defaultKymaRequeueBusyInterval = 5 * time.Second
defaultManifestRequeueSuccessInterval = 30 * time.Second
defaultWatcherRequeueSuccessInterval = 30 * time.Second
defaultClientQPS = 300
defaultClientBurst = 600
defaultPprofServerTimeout = 90 * time.Second
rateLimiterBurstDefault = 200
rateLimiterFrequencyDefault = 30
failureBaseDelayDefault = 100 * time.Millisecond
failureMaxDelayDefault = 5 * time.Second
defaultCacheSyncTimeout = 2 * time.Minute
defaultLogLevel = log.WarnLevel
defaultPurgeFinalizerTimeout = 5 * time.Minute
defaultMaxConcurrentManifestReconciles = 1
defaultMaxConcurrentKymaReconciles = 1
defaultMaxConcurrentWatcherReconciles = 1
defaultIstioGatewayName = "klm-watcher-gateway"
defaultIstioGatewayNamespace = "kcp-system"
defaultIstioNamespace = "istio-system"
defaultCaCertName = "klm-watcher-serving-cert"
defaultCaCertCacheTTL time.Duration = 1 * time.Hour
defaultKymaRequeueSuccessInterval = 30 * time.Second
defaultKymaRequeueErrInterval = 2 * time.Second
defaultKymaRequeueWarningInterval = 30 * time.Second
defaultKymaRequeueBusyInterval = 5 * time.Second
defaultManifestRequeueSuccessInterval = 30 * time.Second
defaultWatcherRequeueSuccessInterval = 30 * time.Second
defaultClientQPS = 300
defaultClientBurst = 600
defaultPprofServerTimeout = 90 * time.Second
rateLimiterBurstDefault = 200
rateLimiterFrequencyDefault = 30
failureBaseDelayDefault = 100 * time.Millisecond
failureMaxDelayDefault = 5 * time.Second
defaultCacheSyncTimeout = 2 * time.Minute
defaultLogLevel = log.WarnLevel
defaultPurgeFinalizerTimeout = 5 * time.Minute
defaultMaxConcurrentManifestReconciles = 1
defaultMaxConcurrentKymaReconciles = 1
defaultMaxConcurrentWatcherReconciles = 1
defaultIstioGatewayName = "klm-watcher-gateway"
defaultIstioGatewayNamespace = "kcp-system"
defaultIstioNamespace = "istio-system"
defaultCaCertName = "klm-watcher-serving-cert"
defaultCaCertCacheTTL time.Duration = 1 * time.Hour
defaultSelfSignedCertDuration time.Duration = 90 * 24 * time.Hour
defaultSelfSignedCertRenewBefore time.Duration = 60 * 24 * time.Hour
defaultSelfSignedCertificateRenewBuffer = 24 * time.Hour
DefaultRemoteSyncNamespace = "kyma-system"
)

//nolint:funlen
func defineFlagVar() *FlagVar {
func DefineFlagVar() *FlagVar {
flagVar := new(FlagVar)
flag.StringVar(&flagVar.metricsAddr, "metrics-bind-address", ":8080",
"The address the metric endpoint binds to.")
Expand Down Expand Up @@ -135,13 +138,21 @@ func defineFlagVar() *FlagVar {
"Indicates the SKR Purge Finalizers execution delay in seconds")
flag.StringVar(&flagVar.skipPurgingFor, "skip-finalizer-purging-for", "", "Exclude the passed CRDs"+
" from finalizer removal. Example: 'ingressroutetcps.traefik.containo.us,*.helm.cattle.io'.")
flag.StringVar(&flagVar.remoteSyncNamespace, "sync-namespace", controller.DefaultRemoteSyncNamespace,
flag.StringVar(&flagVar.remoteSyncNamespace, "sync-namespace", DefaultRemoteSyncNamespace,
"Name of the namespace for syncing remote Kyma and module catalog")
flag.StringVar(&flagVar.caCertName, "ca-cert-name", defaultCaCertName,
"Name of the CA Certificate in Istio Namespace which is used to sign SKR Certificates")
flag.DurationVar(&flagVar.caCertCacheTTL, "ca-cert-cache-ttl", defaultCaCertCacheTTL,
"The ttl for the CA Certificate Cache")
flag.BoolVar(&flagVar.isKymaManaged, "is-kyma-managed", false, "indicates whether Kyma is managed")
flag.DurationVar(&flagVar.SelfSignedCertDuration, "self-signed-cert-duration", defaultSelfSignedCertDuration,
"The lifetime duration of self-signed certificate, minimum accepted duration is 1 hour.")
flag.DurationVar(&flagVar.SelfSignedCertRenewBefore, "self-signed-cert-renew-before",
defaultSelfSignedCertRenewBefore,
"How long before the currently issued self-signed certificate's expiry cert-manager should renew the certificate")
flag.DurationVar(&flagVar.SelfSignedCertRenewBuffer, "self-signed-cert-renew-buffer",
defaultSelfSignedCertificateRenewBuffer,
"The buffer duration to wait before confirm self-signed certificate not renewed")
flag.BoolVar(&flagVar.IsKymaManaged, "is-kyma-managed", false, "indicates whether Kyma is managed")
return flagVar
}

Expand Down Expand Up @@ -192,5 +203,8 @@ type FlagVar struct {
caCertName string
caCertCacheTTL time.Duration
enableVerification bool
isKymaManaged bool
IsKymaManaged bool
SelfSignedCertDuration time.Duration
SelfSignedCertRenewBefore time.Duration
SelfSignedCertRenewBuffer time.Duration
}
73 changes: 36 additions & 37 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"time"

certmanagerv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"
watchermetrics "github.com/kyma-project/runtime-watcher/listener/pkg/metrics"
"go.uber.org/zap/zapcore"
"golang.org/x/time/rate"
istioclientapiv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
Expand All @@ -46,7 +45,6 @@ import (
ctrlruntime "sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/manager"
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"

"github.com/kyma-project/lifecycle-manager/api"
Expand Down Expand Up @@ -88,7 +86,7 @@ func init() {
}

func main() {
flagVar := defineFlagVar()
flagVar := DefineFlagVar()
flag.Parse()
ctrl.SetLogger(log.ConfigLogger(int8(flagVar.logLevel), zapcore.Lock(os.Stdout)))
if flagVar.pprof {
Expand Down Expand Up @@ -147,18 +145,28 @@ func setupManager(flagVar *FlagVar, newCacheOptions cache.Options, scheme *machi
os.Exit(1)
}
remoteClientCache := remote.NewClientCache()
var skrWebhookManager *watcher.SKRWebhookManifestManager
if flagVar.enableKcpWatcher {
watcherChartDirInfo, err := os.Stat(flagVar.skrWatcherPath)
if err != nil || !watcherChartDirInfo.IsDir() {
setupLog.Error(err, "failed to read local skr chart")
os.Exit(1)
}

if skrWebhookManager, err = createSkrWebhookManager(mgr, flagVar); err != nil {
setupLog.Error(err, "failed to create webhook chart manager")
os.Exit(1)
}
setupKcpWatcherReconciler(mgr, options, flagVar)
}

setupKymaReconciler(mgr, remoteClientCache, flagVar, options)
setupKymaReconciler(mgr, remoteClientCache, flagVar, options, skrWebhookManager)
setupManifestReconciler(mgr, flagVar, options)

if flagVar.enablePurgeFinalizer {
setupPurgeReconciler(mgr, remoteClientCache, flagVar, options)
}

if flagVar.enableKcpWatcher {
setupKcpWatcherReconciler(mgr, options, flagVar)
watchermetrics.Init(ctrlmetrics.Registry)
}
if flagVar.enableWebhooks {
enableWebhooks(mgr)
}
Expand Down Expand Up @@ -218,25 +226,11 @@ func controllerOptionsFromFlagVar(flagVar *FlagVar) ctrlruntime.Options {
}
}

func setupKymaReconciler(mgr ctrl.Manager,
remoteClientCache *remote.ClientCache,
flagVar *FlagVar, options ctrlruntime.Options,
func setupKymaReconciler(mgr ctrl.Manager, remoteClientCache *remote.ClientCache, flagVar *FlagVar,
options ctrlruntime.Options, skrWebhookManager *watcher.SKRWebhookManifestManager,
) {
options.MaxConcurrentReconciles = flagVar.maxConcurrentKymaReconciles
kcpRestConfig := mgr.GetConfig()
var skrWebhookManager watcher.SKRWebhookManager
if flagVar.enableKcpWatcher {
watcherChartDirInfo, err := os.Stat(flagVar.skrWatcherPath)
if err != nil || !watcherChartDirInfo.IsDir() {
setupLog.Error(err, "failed to read local skr chart")
os.Exit(1)
}

if skrWebhookManager, err = createSkrWebhookManager(mgr, flagVar); err != nil {
setupLog.Error(err, "failed to create webhook chart manager")
os.Exit(1)
}
}

if err := (&controller.KymaReconciler{
Client: mgr.GetClient(),
Expand All @@ -256,7 +250,7 @@ func setupKymaReconciler(mgr ctrl.Manager,
},
InKCPMode: flagVar.inKCPMode,
RemoteSyncNamespace: flagVar.remoteSyncNamespace,
IsManagedKyma: flagVar.isKymaManaged,
IsManagedKyma: flagVar.IsKymaManaged,
Metrics: metrics.NewKymaMetrics(),
}).SetupWithManager(
mgr, options, controller.SetupUpSetting{
Expand All @@ -271,20 +265,25 @@ func setupKymaReconciler(mgr ctrl.Manager,
}

func createSkrWebhookManager(mgr ctrl.Manager, flagVar *FlagVar) (*watcher.SKRWebhookManifestManager, error) {
caCertificateCache := watcher.NewCertificateCache(flagVar.caCertCacheTTL)
caCertificateCache := watcher.NewCACertificateCache(flagVar.caCertCacheTTL)
return watcher.NewSKRWebhookManifestManager(mgr.GetConfig(), mgr.GetScheme(), caCertificateCache,
&watcher.SkrWebhookManagerConfig{
SKRWatcherPath: flagVar.skrWatcherPath,
SkrWatcherImage: flagVar.skrWatcherImage,
SkrWebhookCPULimits: flagVar.skrWebhookCPULimits,
SkrWebhookMemoryLimits: flagVar.skrWebhookMemoryLimits,
LocalGatewayPortOverwrite: flagVar.listenerPortOverwrite,
IstioNamespace: flagVar.istioNamespace,
watcher.SkrWebhookManagerConfig{
SKRWatcherPath: flagVar.skrWatcherPath,
SkrWatcherImage: flagVar.skrWatcherImage,
SkrWebhookCPULimits: flagVar.skrWebhookCPULimits,
SkrWebhookMemoryLimits: flagVar.skrWebhookMemoryLimits,
RemoteSyncNamespace: flagVar.remoteSyncNamespace,
}, watcher.CertificateConfig{
IstioNamespace: flagVar.istioNamespace,
RemoteSyncNamespace: flagVar.remoteSyncNamespace,
CACertificateName: flagVar.caCertName,
AdditionalDNSNames: strings.Split(flagVar.additionalDNSNames, ","),
Duration: flagVar.SelfSignedCertDuration,
RenewBefore: flagVar.SelfSignedCertRenewBefore,
}, watcher.GatewayConfig{
IstioGatewayName: flagVar.istioGatewayName,
IstioGatewayNamespace: flagVar.istioGatewayNamespace,
RemoteSyncNamespace: flagVar.remoteSyncNamespace,
AdditionalDNSNames: strings.Split(flagVar.additionalDNSNames, ","),
CACertificateName: flagVar.caCertName,
LocalGatewayPortOverwrite: flagVar.listenerPortOverwrite,
})
}

Expand All @@ -304,7 +303,7 @@ func setupPurgeReconciler(mgr ctrl.Manager,
ResolveRemoteClient: resolveRemoteClientFunc,
PurgeFinalizerTimeout: flagVar.purgeFinalizerTimeout,
SkipCRDs: matcher.CreateCRDMatcherFrom(flagVar.skipPurgingFor),
IsManagedKyma: flagVar.isKymaManaged,
IsManagedKyma: flagVar.IsKymaManaged,
Metrics: metrics.NewPurgeMetrics(),
}).SetupWithManager(
mgr, options,
Expand Down
7 changes: 4 additions & 3 deletions internal/controller/kyma_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ type KymaReconciler struct {
record.EventRecorder
queue.RequeueIntervals
signature.VerificationSettings
SKRWebhookManager watcher.SKRWebhookManager
SKRWebhookManager *watcher.SKRWebhookManifestManager
KcpRestConfig *rest.Config
RemoteClientCache *remote.ClientCache
InKCPMode bool
Expand Down Expand Up @@ -119,7 +119,7 @@ func (r *KymaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.

if !kyma.DeletionTimestamp.IsZero() && errors.Is(err, remote.ErrAccessSecretNotFound) {
logger.Info("access secret not found for kyma, assuming already deleted cluster")
r.Metrics.CleanupMetrics(kyma)
r.Metrics.CleanupMetrics(kyma.Name)
r.removeAllFinalizers(kyma)
return ctrl.Result{Requeue: true}, r.updateKyma(ctx, kyma)
}
Expand Down Expand Up @@ -395,6 +395,7 @@ func (r *KymaReconciler) handleDeletingState(ctx context.Context, kyma *v1beta2.
r.enqueueNormalEvent(kyma, webhookChartRemoval, err.Error())
return ctrl.Result{RequeueAfter: r.RequeueIntervals.Busy}, nil
}
r.SKRWebhookManager.WatcherMetrics.CleanupMetrics(kyma.Name)
}

if r.SyncKymaEnabled(kyma) {
Expand All @@ -414,7 +415,7 @@ func (r *KymaReconciler) handleDeletingState(ctx context.Context, kyma *v1beta2.
logger.Info("removed remote finalizer")
}

r.Metrics.CleanupMetrics(kyma)
r.Metrics.CleanupMetrics(kyma.Name)

controllerutil.RemoveFinalizer(kyma, v1beta2.Finalizer)
return ctrl.Result{Requeue: true}, r.updateKyma(ctx, kyma)
Expand Down
1 change: 0 additions & 1 deletion internal/controller/watcher_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ func (r *WatcherReconciler) handleDeletingState(ctx context.Context, watcherCR *
func (r *WatcherReconciler) handleProcessingState(ctx context.Context,
watcherCR *v1beta2.Watcher,
) (ctrl.Result, error) {
// Create virtualService in Memory
virtualSvc, err := r.IstioClient.NewVirtualService(ctx, watcherCR)
if err != nil {
return r.updateWatcherState(ctx, watcherCR, shared.StateError, err)
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/metrics/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
const (
shootIDLabel = "shoot"
instanceIDLabel = "instance_id"
kymaNameLabel = "kyma_name"
KymaNameLabel = "kyma_name"
)

var (
Expand Down
Loading

0 comments on commit d6940b1

Please sign in to comment.