From 033d8d60945849bddd5019e85d110683f4edcc2d Mon Sep 17 00:00:00 2001 From: James Ribe Date: Sun, 4 Aug 2024 18:01:24 -0700 Subject: [PATCH] add SVID check to agent health check (#5298) Signed-off-by: James Ribe --- pkg/agent/agent.go | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 0d11ca29e0..433bad4417 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -44,7 +44,8 @@ const ( ) type Agent struct { - c *Config + c *Config + sto storage.Storage } // Run the agent @@ -56,7 +57,8 @@ func (a *Agent) Run(ctx context.Context) error { return err } - sto, err := storage.Open(a.c.DataDir) + var err error + a.sto, err = storage.Open(a.c.DataDir) if err != nil { return fmt.Errorf("failed to open storage: %w", err) } @@ -109,7 +111,7 @@ func (a *Agent) Run(ctx context.Context) error { ) for { - as, err = a.attest(ctx, sto, cat, metrics, nodeAttestor) + as, err = a.attest(ctx, a.sto, cat, metrics, nodeAttestor) if err == nil { break } @@ -136,7 +138,7 @@ func (a *Agent) Run(ctx context.Context) error { } } } else { - as, err = a.attest(ctx, sto, cat, metrics, nodeAttestor) + as, err = a.attest(ctx, a.sto, cat, metrics, nodeAttestor) if err != nil { return err } @@ -144,7 +146,7 @@ func (a *Agent) Run(ctx context.Context) error { svidStoreCache := a.newSVIDStoreCache() - manager, err := a.newManager(ctx, sto, cat, metrics, as, svidStoreCache, nodeAttestor) + manager, err := a.newManager(ctx, a.sto, cat, metrics, as, svidStoreCache, nodeAttestor) if err != nil { return err } @@ -389,11 +391,14 @@ func (a *Agent) waitForTestDial(ctx context.Context) error { // CheckHealth is used as a top-level health check for the agent. func (a *Agent) CheckHealth() health.State { - err := a.checkWorkloadAPI() - - // Both liveness and readiness checks are done by - // agents ability to create new Workload API client - // for the X509SVID service. + err := errors.Join( + a.checkWorkloadAPI(), + a.checkSVID(), + ) + + // Both liveness and readiness checks verify that: + // - the workload API endpoint is available + // - the agent has an SVID // TODO: Better live check for agent. return health.State{ Ready: err == nil, @@ -407,6 +412,20 @@ func (a *Agent) CheckHealth() health.State { } } +func (a *Agent) checkSVID() error { + if a.sto == nil { + return errors.New("storage not initialized") + } + svid, _, err := a.sto.LoadSVID() + if err != nil { + return fmt.Errorf("loading SVID: %w", err) + } + if svid == nil { + return errors.New("SVID is nil") + } + return nil +} + func (a *Agent) checkWorkloadAPI() error { clientOption, err := util.GetWorkloadAPIClientOption(a.c.BindAddress) if err != nil {