Skip to content

Commit

Permalink
🌟 enhancement(VSecM Sentinel): refactored forever loops (#946)
Browse files Browse the repository at this point in the history
updated infinite loops with early crashes instead

Signed-off-by: Volkan Özçelik <ovolkan@vmware.com>
  • Loading branch information
v0lkan authored Apr 26, 2024
1 parent 77cbb17 commit 3ec6188
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 167 deletions.
107 changes: 49 additions & 58 deletions app/sentinel/background/initialization/connectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,94 +13,85 @@ package initialization
import (
"context"
"github.com/pkg/errors"
"github.com/spiffe/go-spiffe/v2/workloadapi"
"time"

"github.com/vmware-tanzu/secrets-manager/app/sentinel/internal/safe"
"github.com/vmware-tanzu/secrets-manager/core/backoff"
"github.com/vmware-tanzu/secrets-manager/core/env"
log "github.com/vmware-tanzu/secrets-manager/core/log/std"
"github.com/vmware-tanzu/secrets-manager/core/spiffe"
)

func ensureApiConnectivity(ctx context.Context, cid *string) {
terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()

log.TraceLn(cid, "Before checking api connectivity")

for {
s := backoffStrategy()

err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")
s := backoffStrategy()
err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")

src, acquired := spiffe.AcquireSourceForSentinel(ctx)
if !acquired {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")
if terminateAsap {
panic("RunInitCommands:CheckConnectivity: failed to acquire source")
}
src, acquired := spiffe.AcquireSourceForSentinel(ctx)
if !acquired {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")

return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
}
return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
}

log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")

if err := safe.Check(ctx, src); err != nil {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())
if terminateAsap {
panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
}
if err := safe.Check(ctx, src); err != nil {
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())

return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
}
return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
}

log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
return nil
}, s)
log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
return nil
}, s)

if err == nil {
log.TraceLn(cid, "exiting backoffs")
break
}
if err == nil {
log.TraceLn(cid, "exiting backoffs")
return
}

// I shouldn't be here.
panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
}

func ensureSourceAcquisition(ctx context.Context, cid *string) {
func ensureSourceAcquisition(ctx context.Context, cid *string) *workloadapi.X509Source {
// If `true`, instead of retrying with a backoff, kill the pod, and let the
// deployment controller restart it to initiate a new retry.
terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()

waitInterval := env.InitCommandRunnerWaitIntervalForSentinel()
time.Sleep(waitInterval)
log.TraceLn(cid, "RunInitCommands: acquiring source 001")

for {
log.TraceLn(cid, "RunInitCommands: acquiring source 001")
s := backoff.Strategy{
MaxRetries: 20,
Delay: 1000,
Exponential: true,
MaxDuration: 30 * time.Second,
}

s := backoff.Strategy{
MaxRetries: 20,
Delay: 1000,
Exponential: true,
MaxDuration: 30 * time.Second,
}
var src *workloadapi.X509Source

err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")
_, acquired := spiffe.AcquireSourceForSentinel(ctx)
if !acquired {
log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")
if terminateAsap {
panic("RunInitCommands:AcquireSource: failed to acquire source")
}
err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")

return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
}
acq, acquired := spiffe.AcquireSourceForSentinel(ctx)
src = acq

return nil
}, s)
if !acquired {
log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")

if err == nil {
log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
break
return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
}

return nil
}, s)

if err == nil {
log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
return src
}

// I shouldn't be here.
panic("RunInitCommands:AcquireSource: failed to acquire source")
}
26 changes: 10 additions & 16 deletions app/sentinel/background/initialization/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,9 @@ func parseCommandsFile(ctx context.Context, cid *string, scanner *bufio.Scanner)
log.TraceLn(cid, "Before parsing commands 002")

sc := entity.SentinelCommand{}
terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()

if scanner == nil {
if terminateAsap {
log.ErrorLn(cid, "RunInitCommands: error scanning commands file")
panic("RunInitCommands: error scanning commands file")
}

return
panic("RunInitCommands: error scanning commands file")
}

log.TraceLn(cid, "beginning scan")
Expand Down Expand Up @@ -97,10 +91,8 @@ dance:
"RunInitCommands:ProcessCommandBlock:error:",
err.Error(),
)
if terminateAsap {
panic("RunInitCommands:ProcessCommandBlock failed")
}
}

return err
}, s)

Expand All @@ -110,9 +102,10 @@ dance:
"RunInitCommands: error processing command block: ",
err.Error(),
)
if terminateAsap {
panic("RunInitCommands: error processing command block")
}

// If command failed, then the initialization is not totally successful.
// Thus, it is best to crash the container to restart the initialization.
panic("RunInitCommands:ProcessCommandBlock failed")
}

log.TraceLn(cid, "scanner: after delimiter")
Expand Down Expand Up @@ -181,8 +174,9 @@ dance:
"RunInitCommands: Error reading initialization file: ",
err.Error(),
)
if terminateAsap {
panic("RunInitCommands: Error reading initialization file")
}

// If command failed, then the initialization is not totally successful.
// Thus, it is best to crash the container to restart the initialization.
panic("RunInitCommands: Error in scanning the file")
}
}
20 changes: 4 additions & 16 deletions app/sentinel/background/initialization/keystone.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,10 @@ import (

"github.com/vmware-tanzu/secrets-manager/core/backoff"
entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
"github.com/vmware-tanzu/secrets-manager/core/env"
log "github.com/vmware-tanzu/secrets-manager/core/log/std"
)

func markKeystone(ctx context.Context, cid *string) bool {
terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()

s := backoffStrategy()
err := backoff.Retry("RunInitCommands:MarkKeystone", func() error {
log.TraceLn(cid, "RunInitCommands:MarkKeystone: retrying with exponential backoff")
Expand All @@ -33,22 +30,13 @@ func markKeystone(ctx context.Context, cid *string) bool {
Secret: "keystone-init",
})

if err != nil {
if terminateAsap {
panic("RunInitCommands: error setting keystone secret")
}
}

return err
}, s)

if err != nil {
log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
if terminateAsap {
panic("RunInitCommands: error setting keystone secret")
}
return false
if err == nil {
return true
}

return true
log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
panic("RunInitCommands: error setting keystone secret")
}
24 changes: 14 additions & 10 deletions app/sentinel/background/initialization/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ package initialization

import (
"context"
"github.com/spiffe/go-spiffe/v2/workloadapi"
"os"
"time"

Expand Down Expand Up @@ -47,9 +46,19 @@ import (
// If the file cannot be opened, the function logs an informational message and
// returns early. Errors encountered while reading the file or closing it are
// logged as errors.
func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {
func RunInitCommands(ctx context.Context) {
cid := ctx.Value("correlationId").(*string)

waitInterval := env.InitCommandRunnerWaitBeforeExecIntervalForSentinel()
time.Sleep(waitInterval)

// Ensure that we can acquire a source before proceeding.
source := ensureSourceAcquisition(ctx, cid)

// Now, we are sure that we can acquire a source.
// Try to do a VSecM Safe API request with the source.
ensureApiConnectivity(ctx, cid)

// No need to proceed if initialization has been completed already.
if initCommandsExecutedAlready(ctx, source) {
log.TraceLn(cid, "RunInitCommands: executed already. exiting")
Expand All @@ -58,12 +67,6 @@ func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {

log.TraceLn(cid, "RunInitCommands: starting the init flow")

// Ensure that we can acquire a source before proceeding.
ensureSourceAcquisition(ctx, cid)
// Now, we are sure that we can acquire a source.
// Try to do a VSecM Safe API request with the source.
ensureApiConnectivity(ctx, cid)

// Now we know that we can establish a connection to VSecM Safe
// and execute API requests. So, we can safely run init commands.

Expand Down Expand Up @@ -100,13 +103,14 @@ func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {
if !success {
log.TraceLn(cid, "RunInitCommands: failed to mark keystone. exiting")

// If we cannot set the keystone secret, we should not proceed.
// If we cannot set the keystone secret, better to retry everything.
panic("RunInitCommands: failed to set keystone secret")
return
}

// Wait before notifying Keystone. This way, if there are things that
// take time to reconcile, they have a chance to do so.
waitInterval := env.InitCommandRunnerWaitIntervalBeforeInitComplete()
waitInterval = env.InitCommandRunnerWaitIntervalBeforeInitComplete()
time.Sleep(waitInterval)

// Everything is set up.
Expand Down
26 changes: 13 additions & 13 deletions app/sentinel/background/initialization/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,24 @@ func initCommandsExecutedAlready(ctx context.Context, src *workloadapi.X509Sourc

log.TraceLn(cid, "check:initCommandsExecutedAlready")

s := backoffStrategy()
initialized := false
for {
err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
i, err := safe.CheckInitialization(ctx, src)
if err != nil {
return err
}
initialized = i
return nil
}, s)

s := backoffStrategy()
err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
i, err := safe.CheckInitialization(ctx, src)
if err != nil {
log.ErrorLn(cid, "check:backoff:error", err.Error())
continue
return err
}

log.TraceLn(cid, "check:return initialized:", initialized)
initialized = i

return nil
}, s)

if err == nil {
return initialized
}

// I shouldn't be here.
panic("RunInitCommands:initCommandsExecutedAlready: failed to check command initialization")
}
Loading

0 comments on commit 3ec6188

Please sign in to comment.