Skip to content

🌱 Add support for CA/certificate rotation #1062

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"flag"
"fmt"
"net/http"
"os"
"path/filepath"

Expand Down Expand Up @@ -176,16 +177,16 @@ func main() {
os.Exit(1)
}

certPool, err := httputil.NewCertPool(caCertDir)
certPoolWatcher, err := httputil.NewCertPoolWatcher(caCertDir, ctrl.Log.WithName("cert-pool"))
if err != nil {
setupLog.Error(err, "unable to create CA certificate pool")
os.Exit(1)
}
unpacker := &source.ImageRegistry{
BaseCachePath: filepath.Join(cachePath, "unpack"),
// TODO: This needs to be derived per extension via ext.Spec.InstallNamespace
AuthNamespace: systemNamespace,
CaCertPool: certPool,
AuthNamespace: systemNamespace,
CertPoolWatcher: certPoolWatcher,
}

clusterExtensionFinalizers := crfinalizer.NewFinalizers()
Expand All @@ -210,18 +211,15 @@ func main() {
}

cl := mgr.GetClient()
httpClient, err := httputil.BuildHTTPClient(certPool)
if err != nil {
setupLog.Error(err, "unable to create catalogd http client")
os.Exit(1)
}

catalogsCachePath := filepath.Join(cachePath, "catalogs")
if err := os.MkdirAll(catalogsCachePath, 0700); err != nil {
setupLog.Error(err, "unable to create catalogs cache directory")
os.Exit(1)
}
catalogClient := catalogclient.New(cache.NewFilesystemCache(catalogsCachePath, httpClient))
catalogClient := catalogclient.New(cache.NewFilesystemCache(catalogsCachePath, func() (*http.Client, error) {
return httputil.BuildHTTPClient(certPoolWatcher)
}))

resolver := &resolve.CatalogResolver{
WalkCatalogsFunc: resolve.CatalogWalker(
Expand All @@ -243,7 +241,6 @@ func main() {
Unpacker: unpacker,
InstalledBundleGetter: &controllers.DefaultInstalledBundleGetter{ActionClientGetter: acg},
Finalizers: clusterExtensionFinalizers,
CaCertPool: certPool,
Preflights: preflights,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ClusterExtension")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
value: {"name":"olmv1-certificate", "secret":{"secretName":"olmv1-cert", "optional": false, "items": [{"key": "ca.crt", "path": "olm-ca.crt"}]}}
- op: add
path: /spec/template/spec/containers/0/volumeMounts/-
value: {"name":"olmv1-certificate", "readOnly": true, "mountPath":"/var/certs/olm-ca.crt", "subPath":"olm-ca.crt"}
value: {"name":"olmv1-certificate", "readOnly": true, "mountPath":"/var/certs/"}
- op: add
path: /spec/template/spec/containers/0/args/-
value: "--ca-certs-dir=/var/certs"
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/Masterminds/semver/v3 v3.2.1
github.com/blang/semver/v4 v4.0.0
github.com/containerd/containerd v1.7.20
github.com/fsnotify/fsnotify v1.7.0
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.6.0
github.com/google/go-containerregistry v0.20.1
Expand Down Expand Up @@ -112,7 +113,6 @@ require (
github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-git/go-billy/v5 v5.5.0 // indirect
Expand Down
12 changes: 8 additions & 4 deletions internal/catalogmetadata/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ var _ client.Fetcher = &filesystemCache{}
// - IF cached it will verify the cache is up to date. If it is up to date it will return
// the cached contents, if not it will fetch the new contents from the catalogd HTTP
// server and update the cached contents.
func NewFilesystemCache(cachePath string, client *http.Client) client.Fetcher {
func NewFilesystemCache(cachePath string, clientFunc func() (*http.Client, error)) client.Fetcher {
return &filesystemCache{
cachePath: cachePath,
mutex: sync.RWMutex{},
client: client,
getClient: clientFunc,
cacheDataByCatalogName: map[string]cacheData{},
}
}
Expand All @@ -50,7 +50,7 @@ type cacheData struct {
type filesystemCache struct {
mutex sync.RWMutex
cachePath string
client *http.Client
getClient func() (*http.Client, error)
cacheDataByCatalogName map[string]cacheData
}

Expand Down Expand Up @@ -95,7 +95,11 @@ func (fsc *filesystemCache) FetchCatalogContents(ctx context.Context, catalog *c
return nil, fmt.Errorf("error forming request: %v", err)
}

resp, err := fsc.client.Do(req)
client, err := fsc.getClient()
if err != nil {
return nil, fmt.Errorf("error getting HTTP client: %w", err)
}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("error performing request: %v", err)
}
Expand Down
4 changes: 3 additions & 1 deletion internal/catalogmetadata/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,9 @@ func TestFilesystemCache(t *testing.T) {
maps.Copy(tt.tripper.content, tt.contents)
httpClient := http.DefaultClient
httpClient.Transport = tt.tripper
c := cache.NewFilesystemCache(cacheDir, httpClient)
c := cache.NewFilesystemCache(cacheDir, func() (*http.Client, error) {
return httpClient, nil
})

actualFS, err := c.FetchCatalogContents(ctx, tt.catalog)
if !tt.wantErr {
Expand Down
2 changes: 0 additions & 2 deletions internal/controllers/clusterextension_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package controllers
import (
"bytes"
"context"
"crypto/x509"
"errors"
"fmt"
"io"
Expand Down Expand Up @@ -89,7 +88,6 @@ type ClusterExtensionReconciler struct {
cache cache.Cache
InstalledBundleGetter InstalledBundleGetter
Finalizers crfinalizer.Finalizers
CaCertPool *x509.CertPool
Preflights []Preflight
}

Expand Down
108 changes: 108 additions & 0 deletions internal/httputil/certpoolwatcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package httputil

import (
"crypto/x509"
"fmt"
"os"
"sync"
"time"

"github.com/fsnotify/fsnotify"
"github.com/go-logr/logr"
)

type CertPoolWatcher struct {
generation int
dir string
mx sync.RWMutex
pool *x509.CertPool
log logr.Logger
watcher *fsnotify.Watcher
done chan bool
}

// Returns the current CertPool and the generation number
func (cpw *CertPoolWatcher) Get() (*x509.CertPool, int, error) {
cpw.mx.RLock()
defer cpw.mx.RUnlock()
if cpw.pool == nil {
return nil, 0, fmt.Errorf("no certificate pool available")
}
return cpw.pool.Clone(), cpw.generation, nil
}

func (cpw *CertPoolWatcher) Done() {
cpw.done <- true
}

func NewCertPoolWatcher(caDir string, log logr.Logger) (*CertPoolWatcher, error) {
pool, err := NewCertPool(caDir, log)
if err != nil {
return nil, err
}
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, err
}
if err = watcher.Add(caDir); err != nil {
return nil, err
}

cpw := &CertPoolWatcher{
generation: 1,
dir: caDir,
pool: pool,
log: log,
watcher: watcher,
done: make(chan bool),
}
go func() {
for {
select {
case <-watcher.Events:
cpw.drainEvents()
Copy link
Contributor

@everettraven everettraven Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know if any events include only the directory being watched as the event.Name value? If so, I wonder if instead of performing this drainEvents action we could do some event filtering similar to https://github.com/fsnotify/fsnotify/blob/c1467c02fba575afdb5f4201072ab8403bbf00f4/cmd/fsnotify/file.go#L66-L78

I won't block the PR merging on this, but something that could make it so we don't have any "sleep" actions if it is possible

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some filtering might be useful. The only time this path should be updated is when a Secret is updated. The directory is read-only within the pod.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: the filter will not work if new files are added, as they will be filtered out. We need to recognize new files, deleted files, updated files, etc.

Copy link
Contributor

@everettraven everettraven Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to recognize new files, deleted files, updated files, etc.

If we only react to "directory has been updated" type events wouldn't we catch these events as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it wouldn't catch updates to files within, as that's a change to the file, not the directory.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the reasoning for the drain events operation was because when we receive updates we get mass events on everything when something changed. Maybe I misunderstood, which led me to thinking that if any change happened in the directory (including an individual file), it would trigger an event for the directory as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Watch is on the directory, and that includes the contents. It also depends on how things are mounted. A change to a file within a directory does not necessarily indicate a change to the directory.
The drain is there because the update of a single secret may trigger a number of events (I was seeing 4+, because of how the mounted files were presented), and only one reload of the certs is necessary.
Based on my testing, if there's an update on a file, it only reports the update on that file (i.e. create/update); it doesn't trigger a second update on the directory as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added debug output to the cert watcher test for every event in the unit test:

=== RUN   TestCertPoolWatcher
    certpoolwatcher_test.go:72: Create cert file at "/tmp/cert-pool4285276756/test1.pem"
    certpoolwatcher_test.go:87: Create cert file at "/tmp/cert-pool4285276756/test2.pem"
Event: CREATE        "/tmp/cert-pool4285276756/test2.pem"
Event: WRITE         "/tmp/cert-pool4285276756/test2.pem"
--- PASS: TestCertPoolWatcher (1.11s)

So, there's an event for the create of the new PEM, and one for the write, but nothing on the directory itself. There are two events, and that would cause two reloads without the drain mechanism in place.

cpw.update()
case err := <-watcher.Errors:
log.Error(err, "error watching certificate dir")
os.Exit(1)
case <-cpw.done:
err := watcher.Close()
if err != nil {
log.Error(err, "error closing watcher")
}
return
}
}
}()
return cpw, nil
}

func (cpw *CertPoolWatcher) update() {
cpw.log.Info("updating certificate pool")
pool, err := NewCertPool(cpw.dir, cpw.log)
if err != nil {
cpw.log.Error(err, "error updating certificate pool")
os.Exit(1)
}
cpw.mx.Lock()
defer cpw.mx.Unlock()
cpw.pool = pool
cpw.generation++
}

// Drain as many events as possible before doing anything
// Otherwise, we will be hit with an event for _every_ entry in the
// directory, and end up doing an update for each one
func (cpw *CertPoolWatcher) drainEvents() {
for {
drainTimer := time.NewTimer(time.Millisecond * 50)
select {
case <-drainTimer.C:
return
case <-cpw.watcher.Events:
}
if !drainTimer.Stop() {
<-drainTimer.C
}
}
}
97 changes: 97 additions & 0 deletions internal/httputil/certpoolwatcher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package httputil_test

import (
"context"
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"math/big"
"os"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"
"sigs.k8s.io/controller-runtime/pkg/log"

"github.com/operator-framework/operator-controller/internal/httputil"
)

func createCert(t *testing.T, name string) {
priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
require.NoError(t, err)

notBefore := time.Now()
notAfter := notBefore.Add(time.Hour)
serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128)
serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
require.NoError(t, err)

template := x509.Certificate{
SerialNumber: serialNumber,
Subject: pkix.Name{
Organization: []string{name},
},
NotBefore: notBefore,
NotAfter: notAfter,

IsCA: true,

KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},

BasicConstraintsValid: true,
}

derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
require.NoError(t, err)

certOut, err := os.Create(name)
require.NoError(t, err)

err = pem.Encode(certOut, &pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
require.NoError(t, err)

err = certOut.Close()
require.NoError(t, err)

// ignore the key
}

func TestCertPoolWatcher(t *testing.T) {
// create a temporary directory
tmpDir, err := os.MkdirTemp("", "cert-pool")
require.NoError(t, err)
defer os.RemoveAll(tmpDir)

// create the first cert
certName := filepath.Join(tmpDir, "test1.pem")
t.Logf("Create cert file at %q\n", certName)
createCert(t, certName)

// Create the cert pool watcher
cpw, err := httputil.NewCertPoolWatcher(tmpDir, log.FromContext(context.Background()))
require.NoError(t, err)
defer cpw.Done()

// Get the original pool
firstPool, firstGen, err := cpw.Get()
require.NoError(t, err)
require.NotNil(t, firstPool)

// Create a second cert
certName = filepath.Join(tmpDir, "test2.pem")
t.Logf("Create cert file at %q\n", certName)
createCert(t, certName)

require.Eventually(t, func() bool {
secondPool, secondGen, err := cpw.Get()
if err != nil {
return false
}
return secondGen != firstGen && !firstPool.Equal(secondPool)
}, 30*time.Second, time.Second)
}
Loading