Skip to content

Commit

Permalink
fix: Graceful shutdown for the API server (argoproj#18642)
Browse files Browse the repository at this point in the history
Closes argoproj#18642

Implements a graceful shutdown the the API server. Without this, ArgoCD API server will eventually return 502 during rolling update.

Signed-off-by: Andrii Korotkov <andrii.korotkov@verkada.com>
Co-authored-by: Leonardo Luz Almeida <leonardo_almeida@intuit.com>
Co-authored-by: Michael Crenshaw <350466+crenshaw-dev@users.noreply.github.com>
  • Loading branch information
3 people committed Nov 28, 2024
1 parent 8bce61e commit 166e17c
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 22 deletions.
3 changes: 3 additions & 0 deletions cmd/argocd-server/commands/argocd_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ func NewCommand() *cobra.Command {
if closer != nil {
closer()
}
if argocd.ReceivedSignal() {
break
}
}
},
Example: templates.Examples(`
Expand Down
126 changes: 104 additions & 22 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@ import (
"net/url"
"os"
"os/exec"
"os/signal"
"path"
"path/filepath"
"reflect"
"regexp"
go_runtime "runtime"
"strings"
gosync "sync"
"syscall"
"time"

// nolint:staticcheck
Expand Down Expand Up @@ -187,7 +189,7 @@ type ArgoCDServer struct {
db db.ArgoDB

// stopCh is the channel which when closed, will shutdown the Argo CD server
stopCh chan struct{}
stopCh chan os.Signal
userStateStorage util_session.UserStateStorage
indexDataInit gosync.Once
indexData []byte
Expand All @@ -198,6 +200,8 @@ type ArgoCDServer struct {
configMapInformer cache.SharedIndexInformer
serviceSet *ArgoCDServiceSet
extensionManager *extension.Manager
shutdown func()
receivedSignal bool
}

type ArgoCDServerOpts struct {
Expand Down Expand Up @@ -329,6 +333,9 @@ func NewServer(ctx context.Context, opts ArgoCDServerOpts, appsetOpts Applicatio
pg := extension.NewDefaultProjectGetter(projLister, dbInstance)
ug := extension.NewDefaultUserGetter(policyEnf)
em := extension.NewManager(logger, opts.Namespace, sg, ag, pg, enf, ug)
noopShutdown := func() {
log.Error("API Server Shutdown function called but server is not started yet.")
}

a := &ArgoCDServer{
ArgoCDServerOpts: opts,
Expand All @@ -352,6 +359,8 @@ func NewServer(ctx context.Context, opts ArgoCDServerOpts, appsetOpts Applicatio
secretInformer: secretInformer,
configMapInformer: configMapInformer,
extensionManager: em,
shutdown: noopShutdown,
receivedSignal: false,
}

err = a.logInClusterWarnings()
Expand Down Expand Up @@ -601,35 +610,108 @@ func (a *ArgoCDServer) Run(ctx context.Context, listeners *Listeners) {
log.Fatal("Timed out waiting for project cache to sync")
}

a.stopCh = make(chan struct{})
<-a.stopCh
shutdownFunc := func() {
log.Info("API Server shutdown initiated. Shutting down servers...")
sCtx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
var wg gosync.WaitGroup

// Shutdown http server
wg.Add(1)
go func() {
defer wg.Done()
err := httpS.Shutdown(sCtx)
if err != nil {
log.Errorf("Error shutting down http server: %s", err)
}
}()

if httpsS != nil {
// Shutdown https server
wg.Add(1)
go func() {
defer wg.Done()
err := httpsS.Shutdown(sCtx)
if err != nil {
log.Errorf("Error shutting down https server: %s", err)
}
}()
}

// Shutdown gRPC server
wg.Add(1)
go func() {
defer wg.Done()
grpcS.GracefulStop()
}()

// Shutdown metrics server
wg.Add(1)
go func() {
defer wg.Done()
err := metricsServ.Shutdown(sCtx)
if err != nil {
log.Errorf("Error shutting down metrics server: %s", err)
}
}()

if tlsm != nil {
// Shutdown tls server
wg.Add(1)
go func() {
defer wg.Done()
tlsm.Close()
}()
}

// Shutdown tcp server
wg.Add(1)
go func() {
defer wg.Done()
tcpm.Close()
}()

c := make(chan struct{})
// This goroutine will wait for all servers to conclude the shutdown
// process
go func() {
defer close(c)
wg.Wait()
}()

select {
case <-c:
log.Info("All servers were gracefully shutdown. Exiting...")
case <-sCtx.Done():
log.Warn("Graceful shutdown timeout. Exiting...")
}
}
a.shutdown = shutdownFunc

a.stopCh = make(chan os.Signal, 1)
signal.Notify(a.stopCh, os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
signal := <-a.stopCh
log.Infof("API Server received signal: %s", signal.String())
a.receivedSignal = true
a.shutdown()
}

func (a *ArgoCDServer) Initialized() bool {
return a.projInformer.HasSynced() && a.appInformer.HasSynced()
}

// ReceivedSignal returns whether a shutdown was initiated by a signal as opposed to
// a watch.
func (a *ArgoCDServer) ReceivedSignal() bool {
return a.receivedSignal
}

// checkServeErr checks the error from a .Serve() call to decide if it was a graceful shutdown
func (a *ArgoCDServer) checkServeErr(name string, err error) {
if err != nil {
if a.stopCh == nil {
// a nil stopCh indicates a graceful shutdown
log.Infof("graceful shutdown %s: %v", name, err)
} else {
log.Fatalf("%s: %v", name, err)
}
if err != nil && !errors.Is(err, http.ErrServerClosed) {
log.Errorf("Error received from server %s: %v", name, err)
} else {
log.Infof("graceful shutdown %s", name)
}
}

// Shutdown stops the Argo CD server
func (a *ArgoCDServer) Shutdown() {
log.Info("Shut down requested")
stopCh := a.stopCh
a.stopCh = nil
if stopCh != nil {
close(stopCh)
log.Infof("Graceful shutdown of %s initiated", name)
}
}

Expand Down Expand Up @@ -734,7 +816,7 @@ func (a *ArgoCDServer) watchSettings() {
}
}
log.Info("shutting down settings watch")
a.Shutdown()
a.shutdown()
a.settingsMgr.Unsubscribe(updateCh)
close(updateCh)
}
Expand Down

0 comments on commit 166e17c

Please sign in to comment.