From b9508dab63c69a9c55ce6ddbb58e2694fc764efd Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Mon, 21 Mar 2022 17:29:55 -0400 Subject: [PATCH] rework close Rather than changing the directory, have the ttrpc wait until cleanup is done Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/serve.go | 37 ++++++++----------- cmd/containerd-shim-runhcs-v1/service.go | 14 +++---- .../service_internal.go | 15 ++++++-- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/serve.go b/cmd/containerd-shim-runhcs-v1/serve.go index 367f46fc8d..bbb5bb4224 100644 --- a/cmd/containerd-shim-runhcs-v1/serve.go +++ b/cmd/containerd-shim-runhcs-v1/serve.go @@ -12,10 +12,6 @@ import ( "unsafe" "github.com/Microsoft/go-winio" - runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" - "github.com/Microsoft/hcsshim/internal/extendedtask" - "github.com/Microsoft/hcsshim/internal/shimdiag" - "github.com/Microsoft/hcsshim/pkg/octtrpc" "github.com/containerd/containerd/log" "github.com/containerd/containerd/runtime/v2/task" "github.com/containerd/ttrpc" @@ -26,6 +22,11 @@ import ( "github.com/sirupsen/logrus" "github.com/urfave/cli" "golang.org/x/sys/windows" + + runhcsopts "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options" + "github.com/Microsoft/hcsshim/internal/extendedtask" + "github.com/Microsoft/hcsshim/internal/shimdiag" + "github.com/Microsoft/hcsshim/pkg/octtrpc" ) var svc *service @@ -240,30 +241,24 @@ var serveCommand = cli.Command{ case err = <-serrs: // the ttrpc server shutdown without processing a shutdown request case <-svc.Done(): - if !svc.gracefulShutdown { - // Return immediately, but still close ttrpc server, pipes, and spans - // Shouldn't need to os.Exit without clean up (ie, deferred `.Close()`s) - return nil - } - - // containerd waits for the ttrpc server to close, then deletes the sanbox - // bundle. + // containerd waits for the ttrpc server to respons to the shutdown request + // or to close, then deletes the sanbox bundle. // However, this (serve) command is started with the bundle path as the cwd // and panic.log (created within the sandbox bundle) as the stderr. This // prevents containerd from deleting the bundle if this process is still // alive. - // Change the directory to the parent and close stderr (panic.log) to - // allow bundle deletion to succeed. - if err := os.Chdir(".."); err != nil { - logrus.WithError(err).Warn("could not change working directory to bundle directory parent ") - } - os.Stderr.Close() + os.Stderr.Close() + // unlike linux, windows locks a process's working directory, so it + // cannot be deleted if the process is running + // Set wd to dir to finish up cleanup while containerd deletes bundle + if err := os.Chdir(os.TempDir()); err != nil { + logrus.WithError(err).Warning("error while changing working directory") + } sctx, cancel := context.WithTimeout(context.Background(), gracefulShutdownTimeout) defer cancel() - // ignore the error, since we closed stderr (and stdout) if err := s.Shutdown(sctx); err != nil { - logrus.WithError(err).Warning("error while shutting down ttrpc server") + logrus.WithError(err).Warning("error while closing ttrpc server") } } @@ -272,7 +267,7 @@ var serveCommand = cli.Command{ } func trapClosedConnErr(err error) error { - if err == nil || strings.Contains(err.Error(), "use of closed network connection") { + if err == nil || strings.Contains(err.Error(), "use of closed network connection") || errors.Is(err, ttrpc.ErrServerClosed) { return nil } return err diff --git a/cmd/containerd-shim-runhcs-v1/service.go b/cmd/containerd-shim-runhcs-v1/service.go index c1a080eff5..021373500b 100644 --- a/cmd/containerd-shim-runhcs-v1/service.go +++ b/cmd/containerd-shim-runhcs-v1/service.go @@ -9,13 +9,14 @@ import ( "sync/atomic" "time" - "github.com/Microsoft/hcsshim/internal/extendedtask" - "github.com/Microsoft/hcsshim/internal/oc" - "github.com/Microsoft/hcsshim/internal/shimdiag" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/runtime/v2/task" google_protobuf1 "github.com/gogo/protobuf/types" "go.opencensus.io/trace" + + "github.com/Microsoft/hcsshim/internal/extendedtask" + "github.com/Microsoft/hcsshim/internal/oc" + "github.com/Microsoft/hcsshim/internal/shimdiag" ) type ServiceOptions struct { @@ -69,13 +70,8 @@ type service struct { // concurrently. cl sync.Mutex - // shutdown is closed to signal a shutdown request is received + // shutdown signals that the service has finished shutting down shutdown chan struct{} - // shutdownOnce is responsible for closing `shutdown` and any other necessary cleanup - shutdownOnce sync.Once - // gracefulShutdown dictates whether to shutdown gracefully and clean up resources - // or exit immediately - gracefulShutdown bool } var _ = (task.TaskService)(&service{}) diff --git a/cmd/containerd-shim-runhcs-v1/service_internal.go b/cmd/containerd-shim-runhcs-v1/service_internal.go index 0571a13cd3..ed98c22235 100644 --- a/cmd/containerd-shim-runhcs-v1/service_internal.go +++ b/cmd/containerd-shim-runhcs-v1/service_internal.go @@ -467,14 +467,21 @@ func (s *service) shutdownInternal(ctx context.Context, req *task.ShutdownReques return empty, nil } - s.shutdownOnce.Do(func() { + if req.Now { + os.Exit(0) + } + + var err error + select { + case <-s.shutdown: + // already closed + default: // TODO: should taskOrPod be deleted/set to nil? // TODO: is there any extra leftovers of the shimTask/Pod to clean? ie: verify all handles are closed? - s.gracefulShutdown = !req.Now close(s.shutdown) - }) + } - return empty, nil + return empty, err } func (s *service) computeProcessorInfoInternal(ctx context.Context, req *extendedtask.ComputeProcessorInfoRequest) (*extendedtask.ComputeProcessorInfoResponse, error) {