Skip to content

Commit

Permalink
feat: add liveness and readiness probe support (#69)
Browse files Browse the repository at this point in the history
Signed-off-by: Armando Ruocco <armando.ruocco@enterprisedb.com>
Signed-off-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com>
Signed-off-by: Francesco Canovai <francesco.canovai@enterprisedb.com>
Co-authored-by: Leonardo Cecchi <leonardo.cecchi@enterprisedb.com>
Co-authored-by: Francesco Canovai <francesco.canovai@enterprisedb.com>
  • Loading branch information
3 people authored Dec 2, 2024
1 parent 9404772 commit 5fd9449
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/spf13/cobra"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/cloudnative-pg/plugin-barman-cloud/internal/cmd/healthcheck"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cmd/instance"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cmd/operator"
"github.com/cloudnative-pg/plugin-barman-cloud/internal/cmd/restore"
Expand All @@ -32,6 +33,7 @@ func main() {
rootCmd.AddCommand(instance.NewCmd())
rootCmd.AddCommand(operator.NewCmd())
rootCmd.AddCommand(restore.NewCmd())
rootCmd.AddCommand(healthcheck.NewCmd())

if err := rootCmd.ExecuteContext(ctrl.SetupSignalHandler()); err != nil {
if !errors.Is(err, context.Canceled) {
Expand Down
2 changes: 2 additions & 0 deletions internal/cmd/healthcheck/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
// Package healthcheck contains the logic to execute an healthcheck on the plugin through a command
package healthcheck
75 changes: 75 additions & 0 deletions internal/cmd/healthcheck/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package healthcheck

import (
"fmt"
"os"
"path"

"github.com/cloudnative-pg/machinery/pkg/log"
"github.com/spf13/cobra"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/health/grpc_health_v1"

"github.com/cloudnative-pg/plugin-barman-cloud/internal/cnpgi/metadata"
)

// NewCmd returns the healthcheck command
func NewCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "healthcheck",
Short: "healthcheck commands",
}

cmd.AddCommand(unixHealthCheck())

return cmd
}

func unixHealthCheck() *cobra.Command {
cmd := &cobra.Command{
Use: "unix",
Short: "executes the health check command on unix:///plugins/barman-cloud.cloudnative-pg.io",
RunE: func(cmd *cobra.Command, _ []string) error {
dialPath := fmt.Sprintf("unix://%s", path.Join("/plugins", metadata.PluginName))
cli, cliErr := grpc.NewClient(dialPath, grpc.WithTransportCredentials(insecure.NewCredentials()))
if cliErr != nil {
log.Error(cliErr, "while building the client")
return cliErr
}

healthCli := grpc_health_v1.NewHealthClient(cli)
res, healthErr := healthCli.Check(
cmd.Context(),
&grpc_health_v1.HealthCheckRequest{},
)
if healthErr != nil {
log.Error(healthErr, "while executing the healthcheck call")
return healthErr
}

if res.Status == grpc_health_v1.HealthCheckResponse_SERVING {
log.Trace("healthcheck response OK")
os.Exit(0)
return nil
}

log.Error(fmt.Errorf("unexpected healthcheck status: %v", res.Status),
"while processing healthcheck response")

// exit code 1 is returned when we exit from the function with an error
switch res.Status {
case grpc_health_v1.HealthCheckResponse_UNKNOWN:
os.Exit(2)
case grpc_health_v1.HealthCheckResponse_NOT_SERVING:
os.Exit(3)
default:
os.Exit(125)
}

return nil
},
}

return cmd
}
28 changes: 28 additions & 0 deletions internal/cnpgi/common/health.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package common

import (
"context"

"github.com/cloudnative-pg/machinery/pkg/log"
"google.golang.org/grpc"
"google.golang.org/grpc/health/grpc_health_v1"
)

// AddHealthCheck adds a health check service to the gRPC server with the tag 'plugin-barman-cloud'
func AddHealthCheck(server *grpc.Server) {
grpc_health_v1.RegisterHealthServer(server, &healthServer{}) // replaces default registration
}

type healthServer struct {
grpc_health_v1.UnimplementedHealthServer
}

// Check is the response handle for the healthcheck request
func (h healthServer) Check(
ctx context.Context,
_ *grpc_health_v1.HealthCheckRequest,
) (*grpc_health_v1.HealthCheckResponse, error) {
contextLogger := log.FromContext(ctx)
contextLogger.Trace("serving health check response")
return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_SERVING}, nil
}
1 change: 1 addition & 0 deletions internal/cnpgi/instance/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func (c *CNPGI) Start(ctx context.Context) error {
ClusterObjectKey: c.ClusterObjectKey,
InstanceName: c.InstanceName,
})
common.AddHealthCheck(server)
return nil
}

Expand Down
11 changes: 11 additions & 0 deletions internal/cnpgi/operator/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,21 @@ func reconcilePodSpec(
},
}

baseProbe := &corev1.Probe{
FailureThreshold: 3,
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{"manager", "healthcheck", "unix"},
},
},
}

// fixed values
sidecarConfig.Name = "plugin-barman-cloud"
sidecarConfig.Image = viper.GetString("sidecar-image")
sidecarConfig.ImagePullPolicy = cluster.Spec.ImagePullPolicy
sidecarConfig.LivenessProbe = baseProbe.DeepCopy()
sidecarConfig.StartupProbe = baseProbe.DeepCopy()

// merge the main container envs if they aren't already set
for _, container := range spec.Containers {
Expand Down
3 changes: 3 additions & 0 deletions internal/cnpgi/restore/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ func (c *CNPGI) Start(ctx context.Context) error {
PgDataPath: c.PGDataPath,
PgWalFolderToSymlink: PgWalVolumePgWalPath,
})

common.AddHealthCheck(server)

return nil
}

Expand Down
5 changes: 5 additions & 0 deletions kubernetes/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@ spec:
- --server-address=:9090
- --leader-elect
- --log-level=debug
readinessProbe:
tcpSocket:
port: 9090
initialDelaySeconds: 10
periodSeconds: 10
volumeMounts:
- mountPath: /server
name: server
Expand Down

0 comments on commit 5fd9449

Please sign in to comment.