From 67c40258d88c27c3a472689c4acd3b5295348838 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Volkan=20O=CC=88zc=CC=A7elik?= <ovolkan@vmware.com>
Date: Sat, 6 Apr 2024 00:04:45 -0700
Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=94=A8=20refactor(VSecM=20Sentinel):?=
 =?UTF-8?q?=20refactor=20RunInitCommand?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split up the file into smaller logical modules for better readability
and management.

Signed-off-by: Volkan Özçelik <ovolkan@vmware.com>
---
 app/keystone/cmd/main.go                      |  11 +-
 .../background/initialization/backoff.go      |  26 ++
 .../background/initialization/backoff_test.go |  11 +
 .../background/initialization/connectivity.go | 106 ++++++
 .../initialization/connectivity_test.go       |  11 +
 app/sentinel/background/initialization/io.go  | 178 ++++++++++
 .../background/initialization/io_test.go      |  11 +
 .../background/initialization/keystone.go     |  54 +++
 .../initialization/keystone_test.go           |  11 +
 app/sentinel/background/initialization/run.go | 316 ++----------------
 .../background/initialization/validation.go   |  56 ++++
 .../initialization/validation_test.go         |  11 +
 app/sentinel/internal/safe/post.go            |   6 +-
 docs/_pages/0110-configuration.md             |   9 -
 14 files changed, 506 insertions(+), 311 deletions(-)
 create mode 100644 app/sentinel/background/initialization/backoff.go
 create mode 100644 app/sentinel/background/initialization/backoff_test.go
 create mode 100644 app/sentinel/background/initialization/connectivity.go
 create mode 100644 app/sentinel/background/initialization/connectivity_test.go
 create mode 100644 app/sentinel/background/initialization/io.go
 create mode 100644 app/sentinel/background/initialization/io_test.go
 create mode 100644 app/sentinel/background/initialization/keystone.go
 create mode 100644 app/sentinel/background/initialization/keystone_test.go
 create mode 100644 app/sentinel/background/initialization/validation.go
 create mode 100644 app/sentinel/background/initialization/validation_test.go

diff --git a/app/keystone/cmd/main.go b/app/keystone/cmd/main.go
index 26742946..ef562e1b 100644
--- a/app/keystone/cmd/main.go
+++ b/app/keystone/cmd/main.go
@@ -11,12 +11,19 @@
 package main
 
 import (
+	"fmt"
+	"log"
+	"os"
+
 	"github.com/vmware-tanzu/secrets-manager/core/system"
 )
 
 func main() {
-	// TODO: add some logs here!
-	
+	log.Println(
+		"VSecM Keystone",
+		fmt.Sprint("v%s", os.Getenv("APP_VERSION")),
+	)
+
 	// Run on the main thread to wait forever.
 	system.KeepAlive()
 }
diff --git a/app/sentinel/background/initialization/backoff.go b/app/sentinel/background/initialization/backoff.go
new file mode 100644
index 00000000..c15ec996
--- /dev/null
+++ b/app/sentinel/background/initialization/backoff.go
@@ -0,0 +1,26 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
+
+import (
+	"github.com/vmware-tanzu/secrets-manager/core/backoff"
+	"time"
+)
+
+// TODO: get some of these from env vars.
+func backoffStrategy() backoff.Strategy {
+	return backoff.Strategy{
+		MaxRetries:  20,
+		Delay:       1000,
+		Exponential: true,
+		MaxDuration: 30 * time.Second,
+	}
+}
diff --git a/app/sentinel/background/initialization/backoff_test.go b/app/sentinel/background/initialization/backoff_test.go
new file mode 100644
index 00000000..5c81ee09
--- /dev/null
+++ b/app/sentinel/background/initialization/backoff_test.go
@@ -0,0 +1,11 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
diff --git a/app/sentinel/background/initialization/connectivity.go b/app/sentinel/background/initialization/connectivity.go
new file mode 100644
index 00000000..b6226bbc
--- /dev/null
+++ b/app/sentinel/background/initialization/connectivity.go
@@ -0,0 +1,106 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
+
+import (
+	"context"
+	"github.com/pkg/errors"
+	"time"
+
+	"github.com/vmware-tanzu/secrets-manager/app/sentinel/internal/safe"
+	"github.com/vmware-tanzu/secrets-manager/core/backoff"
+	"github.com/vmware-tanzu/secrets-manager/core/env"
+	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
+	"github.com/vmware-tanzu/secrets-manager/core/spiffe"
+)
+
+func ensureApiConnectivity(ctx context.Context, cid *string) {
+	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
+
+	log.TraceLn(cid, "Before checking api connectivity")
+
+	for {
+		s := backoffStrategy()
+
+		err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
+			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")
+
+			src, acquired := spiffe.AcquireSourceForSentinel(ctx)
+			if !acquired {
+				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")
+				if terminateAsap {
+					panic("RunInitCommands:CheckConnectivity: failed to acquire source")
+				}
+
+				return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
+			}
+
+			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")
+
+			if err := safe.Check(ctx, src); err != nil {
+				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())
+				if terminateAsap {
+					panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
+				}
+
+				return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
+			}
+
+			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
+			return nil
+		}, s)
+
+		if err == nil {
+			log.TraceLn(cid, "exiting backoffs")
+			break
+		}
+	}
+}
+
+func ensureSourceAcquisition(ctx context.Context, cid *string) {
+	// If `true`, instead of retrying with a backoff, kill the pod, and let the
+	// deployment controller restart it to initiate a new retry.
+	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
+
+	waitInterval := env.InitCommandRunnerWaitIntervalForSentinel()
+	time.Sleep(waitInterval)
+
+	for {
+		log.TraceLn(cid, "RunInitCommands: acquiring source 001")
+
+		s := backoff.Strategy{
+			MaxRetries:  20,
+			Delay:       1000,
+			Exponential: true,
+			MaxDuration: 30 * time.Second,
+		}
+
+		err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
+			log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")
+			_, acquired := spiffe.AcquireSourceForSentinel(ctx)
+			if !acquired {
+				log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")
+				if terminateAsap {
+					panic("RunInitCommands:AcquireSource: failed to acquire source")
+				}
+
+				return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
+			}
+
+			return nil
+		}, s)
+
+		if err == nil {
+			log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
+			break
+		}
+	}
+}
diff --git a/app/sentinel/background/initialization/connectivity_test.go b/app/sentinel/background/initialization/connectivity_test.go
new file mode 100644
index 00000000..5c81ee09
--- /dev/null
+++ b/app/sentinel/background/initialization/connectivity_test.go
@@ -0,0 +1,11 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
diff --git a/app/sentinel/background/initialization/io.go b/app/sentinel/background/initialization/io.go
new file mode 100644
index 00000000..789a5027
--- /dev/null
+++ b/app/sentinel/background/initialization/io.go
@@ -0,0 +1,178 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
+
+import (
+	"bufio"
+	"context"
+	"os"
+	"strconv"
+	"strings"
+
+	"github.com/vmware-tanzu/secrets-manager/core/backoff"
+	entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
+	"github.com/vmware-tanzu/secrets-manager/core/env"
+	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
+)
+
+func commandFileScanner(cid *string) *bufio.Scanner {
+	filePath := env.InitCommandPathForSentinel()
+	file, err := os.Open(filePath)
+
+	if err != nil {
+		log.InfoLn(
+			cid,
+			"RunInitCommands: no initialization file found... skipping custom initialization.",
+		)
+		return nil
+	}
+
+	defer func(file *os.File) {
+		err := file.Close()
+		if err != nil {
+			log.ErrorLn(cid, "RunInitCommands: Error closing initialization file: ", err.Error())
+		}
+	}(file)
+
+	log.TraceLn(cid, "Before parsing commands")
+
+	// Parse the commands file and execute the commands in it.
+	return bufio.NewScanner(file)
+}
+
+func parseCommandsFile(ctx context.Context, cid *string, scanner *bufio.Scanner) {
+	log.TraceLn(cid, "Before parsing commands")
+
+	sc := entity.SentinelCommand{}
+	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
+
+	if scanner == nil {
+		if terminateAsap {
+			log.ErrorLn(cid, "RunInitCommands: error scanning commands file")
+			panic("RunInitCommands: error scanning commands file")
+		}
+
+		return
+	}
+
+dance:
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		log.TraceLn(cid, "line:", line)
+
+		if line == "" {
+			continue
+		}
+
+		parts := strings.SplitN(line, separator, 2)
+
+		if len(parts) != 2 && line != delimiter {
+			continue
+		}
+
+		if line == delimiter {
+			log.TraceLn(cid, "scanner: delimiter found")
+			if sc.ShouldSleep {
+				doSleep(sc.SleepIntervalMs)
+				sc = entity.SentinelCommand{}
+				continue
+			}
+
+			s := backoffStrategy()
+
+			err := backoff.Retry("RunInitCommands:ProcessCommandBlock", func() error {
+				log.TraceLn(
+					cid,
+					"RunInitCommands:ProcessCommandBlock: processCommandBlock: retrying with exponential backoff",
+				)
+
+				err := processCommandBlock(ctx, sc)
+				if err != nil {
+					log.ErrorLn(
+						cid,
+						"RunInitCommands:ProcessCommandBlock:error:",
+						err.Error(),
+					)
+					if terminateAsap {
+						panic("RunInitCommands:ProcessCommandBlock failed")
+					}
+				}
+				return err
+			}, s)
+
+			if err != nil {
+				log.ErrorLn(
+					cid,
+					"RunInitCommands: error processing command block: ",
+					err.Error(),
+				)
+				if terminateAsap {
+					panic("RunInitCommands: error processing command block")
+				}
+			}
+
+			log.TraceLn(cid, "scanner: after delimiter")
+
+			sc = entity.SentinelCommand{}
+			continue
+		}
+
+		log.TraceLn(cid, "command found")
+
+		key := parts[0]
+		value := parts[1]
+
+		log.TraceLn(cid, "key", key, "value", value)
+
+		switch command(key) {
+		case exit:
+			// exit.
+			log.InfoLn(
+				cid,
+				"exit found during initialization.",
+				"skipping the rest of the commands.",
+				"skipping post initialization.",
+			)
+			// Move out of the loop to allow the keystone secret to be registered.
+			break dance
+		case workload:
+			sc.WorkloadIds = strings.SplitN(value, itemSeparator, -1)
+		case namespace:
+			sc.Namespaces = strings.SplitN(value, itemSeparator, -1)
+		case secret:
+			sc.Secret = value
+		case transformation:
+			sc.Template = value
+		case sleep:
+			sc.ShouldSleep = true
+			intms, err := strconv.Atoi(value)
+			if err != nil {
+				log.ErrorLn(cid, "RunInitCommands: Error parsing sleep interval: ", err.Error())
+			}
+			sc.SleepIntervalMs = intms
+		default:
+			log.InfoLn(cid, "RunInitCommands: unknown command: ", key)
+		}
+	}
+
+	log.TraceLn(cid, "scan finished")
+
+	if err := scanner.Err(); err != nil {
+		log.ErrorLn(
+			cid,
+			"RunInitCommands: Error reading initialization file: ",
+			err.Error(),
+		)
+		if terminateAsap {
+			panic("RunInitCommands: Error reading initialization file")
+		}
+	}
+}
diff --git a/app/sentinel/background/initialization/io_test.go b/app/sentinel/background/initialization/io_test.go
new file mode 100644
index 00000000..5c81ee09
--- /dev/null
+++ b/app/sentinel/background/initialization/io_test.go
@@ -0,0 +1,11 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
diff --git a/app/sentinel/background/initialization/keystone.go b/app/sentinel/background/initialization/keystone.go
new file mode 100644
index 00000000..42486618
--- /dev/null
+++ b/app/sentinel/background/initialization/keystone.go
@@ -0,0 +1,54 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
+
+import (
+	"context"
+
+	"github.com/vmware-tanzu/secrets-manager/core/backoff"
+	entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
+	"github.com/vmware-tanzu/secrets-manager/core/env"
+	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
+)
+
+func markKeystone(ctx context.Context, cid *string) bool {
+	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
+
+	s := backoffStrategy()
+	err := backoff.Retry("RunInitCommands:MarkKeystone", func() error {
+		log.TraceLn(cid, "RunInitCommands:MarkKeystone: retrying with exponential backoff")
+
+		// Assign a secret for VSecM Keystone
+		err := processCommandBlock(ctx, entity.SentinelCommand{
+			WorkloadIds: []string{"vsecm-keystone"},
+			Namespaces:  []string{"vsecm-system"},
+			Secret:      "keystone-init",
+		})
+
+		if err != nil {
+			if terminateAsap {
+				panic("RunInitCommands: error setting keystone secret")
+			}
+		}
+
+		return err
+	}, s)
+
+	if err != nil {
+		log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
+		if terminateAsap {
+			panic("RunInitCommands: error setting keystone secret")
+		}
+		return false
+	}
+
+	return true
+}
diff --git a/app/sentinel/background/initialization/keystone_test.go b/app/sentinel/background/initialization/keystone_test.go
new file mode 100644
index 00000000..5c81ee09
--- /dev/null
+++ b/app/sentinel/background/initialization/keystone_test.go
@@ -0,0 +1,11 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
diff --git a/app/sentinel/background/initialization/run.go b/app/sentinel/background/initialization/run.go
index 729ad067..03728c01 100644
--- a/app/sentinel/background/initialization/run.go
+++ b/app/sentinel/background/initialization/run.go
@@ -11,21 +11,12 @@
 package initialization
 
 import (
-	"bufio"
 	"context"
-	"fmt"
-	"github.com/pkg/errors"
-	"strconv"
-	"strings"
 	"time"
 
 	"github.com/vmware-tanzu/secrets-manager/app/sentinel/internal/safe"
-	"github.com/vmware-tanzu/secrets-manager/core/backoff"
-	entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
 	"github.com/vmware-tanzu/secrets-manager/core/env"
 	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
-	"github.com/vmware-tanzu/secrets-manager/core/spiffe"
-	"os"
 )
 
 // RunInitCommands reads and processes initialization commands from a file.
@@ -56,308 +47,39 @@ import (
 // returns early. Errors encountered while reading the file or closing it are
 // logged as errors.
 func RunInitCommands(ctx context.Context) {
-	// If `true`, instead of retrying with a backoff, kill the pod, and let the
-	// deployment controller restart it to initiate a new retry.
-	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
-
 	cid := ctx.Value("correlationId").(*string)
 
-	waitInterval := env.InitCommandRunnerWaitIntervalForSentinel()
-	time.Sleep(waitInterval)
-
-	// TODO: remove InitCommandRunnerWaitTimeoutForSentinel()
-	// we don't need it. If the init commands cannot run either the
-	// commands are corrupt (then it needs fixing) or there is a
-	// connectivity issue (then it needs retry).
-	// Init commands should be reliable, NOT half-baked.
-
-	for {
-		log.TraceLn(cid, "RunInitCommands: acquiring source 001")
-
-		s := backoff.Strategy{
-			MaxRetries:  20,
-			Delay:       1000,
-			Exponential: true,
-			MaxDuration: 30 * time.Second,
-		}
-
-		err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
-			log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")
-			_, acquired := spiffe.AcquireSourceForSentinel(ctx)
-			if !acquired {
-				log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")
-				if terminateAsap {
-					panic("RunInitCommands:AcquireSource: failed to acquire source")
-				}
-
-				return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
-			}
-
-			return nil
-		}, s)
-
-		if err == nil {
-			log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
-			break
-		}
-	}
-
-	// Now, we are sure that we can acquire a source.
-	// Try to do a fetch with the source.
-
-	log.TraceLn(cid, "Before checking api connectivity")
-
-	for {
-		s := backoff.Strategy{
-			MaxRetries:  20,
-			Delay:       1000,
-			Exponential: true,
-			MaxDuration: 30 * time.Second,
-		}
-
-		err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")
-
-			src, acquired := spiffe.AcquireSourceForSentinel(ctx)
-			if !acquired {
-				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")
-				if terminateAsap {
-					panic("RunInitCommands:CheckConnectivity: failed to acquire source")
-				}
-
-				return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
-			}
-
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")
-
-			if err := safe.Check(ctx, src); err != nil {
-				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())
-				if terminateAsap {
-					panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
-				}
-
-				return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
-			}
-
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
-			return nil
-		}, s)
-
-		if err == nil {
-			log.TraceLn(cid, "exiting backoffs")
-			break
-		}
-	}
-
-	// Now we know that we can establish a connection to VSecM Safe.
-	// We can safely run init commands.
-
-	log.TraceLn(cid, "checking tombstone file")
-
-	// Parse tombstone file first:
-	tombstonePath := env.InitCommandTombstonePathForSentinel()
-	file, err := os.Open(tombstonePath)
-	if err != nil {
-		log.InfoLn(
-			cid,
-			"RunInitCommands: no tombstone file found... skipping custom initialization.",
-		)
-		return
-	}
-
-	defer func(file *os.File) {
-		err := file.Close()
-		if err != nil {
-			log.ErrorLn(cid, "Error closing tombstone file: ", err.Error())
-		}
-	}(file)
-
-	data, err := os.ReadFile(tombstonePath)
-
-	log.InfoLn(cid, fmt.Sprintf("tombstone:'%s'", string(data)))
-
-	if strings.TrimSpace(string(data)) == "complete" {
-		log.InfoLn(
-			cid,
-			"RunInitCommands: Already initialized. Skipping custom initialization.",
-		)
-		return
-	}
-
-	filePath := env.InitCommandPathForSentinel()
-	file, err = os.Open(filePath)
-
-	if err != nil {
-		log.InfoLn(
-			cid,
-			"RunInitCommands: no initialization file found... skipping custom initialization.",
-		)
+	// No need to proceed if initialization has been completed already.
+	if !initCommandsExecutedAlready(cid) {
 		return
 	}
 
-	defer func(file *os.File) {
-		err := file.Close()
-		if err != nil {
-			log.ErrorLn(cid, "RunInitCommands: Error closing initialization file: ", err.Error())
-		}
-	}(file)
-
-	scanner := bufio.NewScanner(file)
-	var sc entity.SentinelCommand
-
-	log.TraceLn(cid, "Before parsing commands")
-
-dance:
-	for scanner.Scan() {
-		line := strings.TrimSpace(scanner.Text())
-		log.TraceLn(cid, "line:", line)
-
-		if line == "" {
-			continue
-		}
-
-		parts := strings.SplitN(line, separator, 2)
-
-		if len(parts) != 2 && line != delimiter {
-			continue
-		}
-
-		if line == delimiter {
-			log.TraceLn(cid, "scanner: delimiter found")
-			if sc.ShouldSleep {
-				doSleep(sc.SleepIntervalMs)
-				sc = entity.SentinelCommand{}
-				continue
-			}
-
-			// TODO: get some of these from env vars.
-			s := backoff.Strategy{
-				MaxRetries:  20,
-				Delay:       1000,
-				Exponential: true,
-				MaxDuration: 30 * time.Second,
-			}
-
-			err = backoff.Retry("RunInitCommands:ProcessCommandBlock", func() error {
-				log.TraceLn(
-					cid,
-					"RunInitCommands:ProcessCommandBlock: processCommandBlock: retrying with exponential backoff",
-				)
-
-				err := processCommandBlock(ctx, sc)
-				if err != nil {
-					log.ErrorLn(
-						cid,
-						"RunInitCommands:ProcessCommandBlock:error:",
-						err.Error(),
-					)
-					if terminateAsap {
-						panic("RunInitCommands:ProcessCommandBlock failed")
-					}
-				}
-				return err
-			}, s)
-
-			if err != nil {
-				log.ErrorLn(
-					cid,
-					"RunInitCommands: error processing command block: ",
-					err.Error(),
-				)
-				if terminateAsap {
-					panic("RunInitCommands: error processing command block")
-				}
-			}
-
-			log.TraceLn(cid, "scanner: after delimiter")
-
-			sc = entity.SentinelCommand{}
-			continue
-		}
-
-		log.TraceLn(cid, "command found")
-
-		key := parts[0]
-		value := parts[1]
-
-		log.TraceLn(cid, "key", key, "value", value)
-
-		switch command(key) {
-		case exit:
-			// exit.
-			log.InfoLn(
-				cid,
-				"exit found during initialization.",
-				"skipping the rest of the commands.",
-				"skipping post initialization.",
-			)
-			// Move out of the loop to allow the keystone secret to be registered.
-			break dance
-		case workload:
-			sc.WorkloadIds = strings.SplitN(value, itemSeparator, -1)
-		case namespace:
-			sc.Namespaces = strings.SplitN(value, itemSeparator, -1)
-		case secret:
-			sc.Secret = value
-		case transformation:
-			sc.Template = value
-		case sleep:
-			sc.ShouldSleep = true
-			intms, err := strconv.Atoi(value)
-			if err != nil {
-				log.ErrorLn(cid, "RunInitCommands: Error parsing sleep interval: ", err.Error())
-			}
-			sc.SleepIntervalMs = intms
-		default:
-			log.InfoLn(cid, "RunInitCommands: unknown command: ", key)
-		}
-	}
-
-	log.TraceLn(cid, "scan finished")
-
-	if err := scanner.Err(); err != nil {
-		log.ErrorLn(
-			cid,
-			"RunInitCommands: Error reading initialization file: ",
-			err.Error(),
-		)
-		if terminateAsap {
-			panic("RunInitCommands: Error reading initialization file")
-		}
-	}
-
-	// TODO: get some of these from env vars.
-	s := backoff.Strategy{
-		MaxRetries:  20,
-		Delay:       1000,
-		Exponential: true,
-		MaxDuration: 30 * time.Second,
-	}
+	// Ensure that we can acquire a source before proceeding.
+	ensureSourceAcquisition(ctx, cid)
+	// Now, we are sure that we can acquire a source.
+	// Try to do a VSecM Safe API request with the source.
+	ensureApiConnectivity(ctx, cid)
 
-	err = backoff.Retry("RunInitCommands:MarkKeystone", func() error {
-		log.TraceLn(cid, "RunInitCommands:MarkKeystone: retrying with exponential backoff")
+	// Now we know that we can establish a connection to VSecM Safe
+	// and execute API requests. So, we can safely run init commands.
 
-		// Assign a secret for VSecM Keystone
-		return processCommandBlock(ctx, entity.SentinelCommand{
-			WorkloadIds: []string{"vsecm-keystone"},
-			Namespaces:  []string{"vsecm-system"},
-			Secret:      "keystone-init",
-		})
-	}, s)
+	// Parse the commands file and execute the commands in it.
+	scanner := commandFileScanner(cid)
+	parseCommandsFile(ctx, cid, scanner)
 
-	if err != nil {
-		log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
-		if terminateAsap {
-			panic("RunInitCommands: error setting keystone secret")
-		}
+	// Mark the keystone secret.
+	success := markKeystone(ctx, cid)
+	if !success {
+		// If we cannot set the keystone secret, we should not proceed.
 		return
 	}
 
 	// Wait before notifying Keystone. This way, if there are things that
 	// take time to reconcile, they have a chance to do so.
-	waitInterval = env.InitCommandRunnerWaitIntervalBeforeInitComplete()
+	waitInterval := env.InitCommandRunnerWaitIntervalBeforeInitComplete()
 	time.Sleep(waitInterval)
 
+	// Everything is set up. Mark the initialization as complete.
 	log.InfoLn(cid, "RunInitCommands: keystone secret set successfully.")
-	safe.MarkInitializationAsCompleted(ctx)
+	safe.MarkInitializationCompletion(ctx)
 }
diff --git a/app/sentinel/background/initialization/validation.go b/app/sentinel/background/initialization/validation.go
new file mode 100644
index 00000000..8f9bc3db
--- /dev/null
+++ b/app/sentinel/background/initialization/validation.go
@@ -0,0 +1,56 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/vmware-tanzu/secrets-manager/core/env"
+	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
+)
+
+func initCommandsExecutedAlready(cid *string) bool {
+	log.TraceLn(cid, "checking tombstone file")
+
+	// Parse tombstone file first:
+	tombstonePath := env.InitCommandTombstonePathForSentinel()
+	file, err := os.Open(tombstonePath)
+	if err != nil {
+		log.InfoLn(
+			cid,
+			"RunInitCommands: no tombstone file found... skipping custom initialization.",
+		)
+		return false
+	}
+
+	defer func(file *os.File) {
+		err := file.Close()
+		if err != nil {
+			log.ErrorLn(cid, "Error closing tombstone file: ", err.Error())
+		}
+	}(file)
+
+	data, err := os.ReadFile(tombstonePath)
+
+	log.InfoLn(cid, fmt.Sprintf("tombstone:'%s'", string(data)))
+
+	if strings.TrimSpace(string(data)) == "complete" {
+		log.InfoLn(
+			cid,
+			"RunInitCommands: Already initialized. Skipping custom initialization.",
+		)
+		return false
+	}
+
+	return true
+}
diff --git a/app/sentinel/background/initialization/validation_test.go b/app/sentinel/background/initialization/validation_test.go
new file mode 100644
index 00000000..5c81ee09
--- /dev/null
+++ b/app/sentinel/background/initialization/validation_test.go
@@ -0,0 +1,11 @@
+/*
+|    Protect your secrets, protect your sensitive data.
+:    Explore VMware Secrets Manager docs at https://vsecm.com/
+</
+<>/  keep your secrets... secret
+>/
+<>/' Copyright 2023-present VMware Secrets Manager contributors.
+>/'  SPDX-License-Identifier: BSD-2-Clause
+*/
+
+package initialization
diff --git a/app/sentinel/internal/safe/post.go b/app/sentinel/internal/safe/post.go
index 07cdd747..468df7c6 100644
--- a/app/sentinel/internal/safe/post.go
+++ b/app/sentinel/internal/safe/post.go
@@ -172,7 +172,7 @@ func doPost(cid *string, client *http.Client, p string, md []byte) error {
 	return nil
 }
 
-// MarkInitializationAsCompleted is a function that signals the completion of a
+// MarkInitializationCompletion is a function that signals the completion of a
 // post-initialization process.
 // It takes a parent context as an argument and performs several steps involving
 // timeout management, source acquisition, error handling, and sending a
@@ -193,7 +193,7 @@ func doPost(cid *string, client *http.Client, p string, md []byte) error {
 // Parameters:
 //   - parentContext (context.Context): The parent context from which the function
 //     will derive its context.
-func MarkInitializationAsCompleted(parentContext context.Context) {
+func MarkInitializationCompletion(parentContext context.Context) {
 	ctxWithTimeout, cancel := context.WithTimeout(
 		parentContext,
 		env.SourceAcquisitionTimeoutForSafe(),
@@ -202,7 +202,7 @@ func MarkInitializationAsCompleted(parentContext context.Context) {
 
 	cid := ctxWithTimeout.Value("correlationId").(*string)
 
-	log.AuditLn(cid, "Sentinel:MarkInitializationAsCompleted")
+	log.AuditLn(cid, "Sentinel:MarkInitializationCompletion")
 
 	sourceChan := make(chan *workloadapi.X509Source)
 	proceedChan := make(chan bool)
diff --git a/docs/_pages/0110-configuration.md b/docs/_pages/0110-configuration.md
index b5c531ab..fec679d7 100644
--- a/docs/_pages/0110-configuration.md
+++ b/docs/_pages/0110-configuration.md
@@ -460,15 +460,6 @@ Defaults to `"/opt/vsecm-sentinel/tombstone/init"`.
 This path is usually used to store a "*tombstone*" file or data indicating that
 the initialization command has been executed or is no longer valid.
 
-### VSECM_SENTINEL_INIT_COMMAND_RUNNER_WAIT_TIMEOUT
-
-`VSECM_SENTINEL_INIT_COMMAND_RUNNER_WAIT_TIMEOUT` it the timeout duration for
-**VSecM Sentinel** to wait for **VSecM Safe** to be operational before 
-executing the initialization commands.
-
-If the environment variable is not set or invalid, a default timeout of
-`300000` milliseconds (5 minutes) is used.
-
 ### VSECM_SENTINEL_LOGGER_URL
 
 `VSECM_SENTINEL_LOGGER_URL` ise the URL for the **VSecM Sentinel** Logger.

From 93fd382cad246edcc660b485d8e6b8c1df1b04e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Volkan=20O=CC=88zc=CC=A7elik?= <ovolkan@vmware.com>
Date: Sat, 6 Apr 2024 00:34:57 -0700
Subject: [PATCH 2/3] format fix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Volkan Özçelik <ovolkan@vmware.com>
---
 app/keystone/cmd/main.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/keystone/cmd/main.go b/app/keystone/cmd/main.go
index ef562e1b..df4e2f8f 100644
--- a/app/keystone/cmd/main.go
+++ b/app/keystone/cmd/main.go
@@ -21,7 +21,7 @@ import (
 func main() {
 	log.Println(
 		"VSecM Keystone",
-		fmt.Sprint("v%s", os.Getenv("APP_VERSION")),
+		fmt.Sprintf("v%s", os.Getenv("APP_VERSION")),
 	)
 
 	// Run on the main thread to wait forever.

From 340b6602265ad87d09c937bb0d60fc09964350fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Volkan=20O=CC=88zc=CC=A7elik?= <ovolkan@vmware.com>
Date: Sat, 6 Apr 2024 01:32:42 -0700
Subject: [PATCH 3/3] add backoff to post
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Volkan Özçelik <ovolkan@vmware.com>
---
 app/sentinel/internal/safe/post.go | 41 +++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/app/sentinel/internal/safe/post.go b/app/sentinel/internal/safe/post.go
index 468df7c6..3725109d 100644
--- a/app/sentinel/internal/safe/post.go
+++ b/app/sentinel/internal/safe/post.go
@@ -15,7 +15,6 @@ import (
 	"context"
 	"encoding/json"
 	"github.com/pkg/errors"
-	"github.com/vmware-tanzu/secrets-manager/core/spiffe"
 	"io"
 	"net/http"
 	"net/url"
@@ -26,15 +25,19 @@ import (
 	"github.com/spiffe/go-spiffe/v2/spiffetls/tlsconfig"
 	"github.com/spiffe/go-spiffe/v2/workloadapi"
 
+	"github.com/vmware-tanzu/secrets-manager/core/backoff"
 	"github.com/vmware-tanzu/secrets-manager/core/crypto"
 	data "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
 	entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
 	reqres "github.com/vmware-tanzu/secrets-manager/core/entity/reqres/safe/v1"
 	"github.com/vmware-tanzu/secrets-manager/core/env"
 	log "github.com/vmware-tanzu/secrets-manager/core/log/rpc"
+	"github.com/vmware-tanzu/secrets-manager/core/spiffe"
 	"github.com/vmware-tanzu/secrets-manager/core/validation"
 )
 
+// TODO: move private fns to their own files.
+
 func createAuthorizer() tlsconfig.Authorizer {
 	return tlsconfig.AdaptMatcher(func(id spiffeid.ID) error {
 		if validation.IsSafe(id.String()) {
@@ -172,6 +175,17 @@ func doPost(cid *string, client *http.Client, p string, md []byte) error {
 	return nil
 }
 
+// TODO: get some of these from env vars.
+// TODO: this method is duplicated in multiple places. Refactor.
+func backoffStrategy() backoff.Strategy {
+	return backoff.Strategy{
+		MaxRetries:  20,
+		Delay:       1000,
+		Exponential: true,
+		MaxDuration: 30 * time.Second,
+	}
+}
+
 // MarkInitializationCompletion is a function that signals the completion of a
 // post-initialization process.
 // It takes a parent context as an argument and performs several steps involving
@@ -261,11 +275,32 @@ func MarkInitializationCompletion(parentContext context.Context) {
 			return
 		}
 
-		doPost(cid, client, p, md)
+		// Try forever until success.
+		for {
+			s := backoffStrategy()
+
+			err := backoff.Retry("sentinel:post", func() error {
+				log.TraceLn(cid, "sentinel:post")
+
+				err := doPost(cid, client, p, md)
+				if err != nil {
+					log.ErrorLn(
+						cid,
+						"sentinel:post: error:", err.Error(), "will retry.",
+					)
+				}
+
+				return err
+			}, s)
+
+			if err == nil {
+				continue
+			}
+		}
 	}
 }
 
-var seed = time.Now().UnixNano()
+// var seed = time.Now().UnixNano()
 
 func Post(parentContext context.Context,
 	sc entity.SentinelCommand,