From bb34eb20c765d1a7edff2b4d0fb75ca4f852d726 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Sat, 12 Oct 2019 14:19:10 +0800 Subject: [PATCH 1/4] open debug mode to take over shim standard output Signed-off-by: Wei Fu --- daemon/daemon.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/daemon/daemon.go b/daemon/daemon.go index 04b8e1a96..19acd8c1e 100644 --- a/daemon/daemon.go +++ b/daemon/daemon.go @@ -81,9 +81,10 @@ func NewDaemon(cfg *config.Config) *Daemon { if cfg.Debug { ctrdDaemonOpts = append(ctrdDaemonOpts, supervisord.WithLogLevel("debug")) - ctrdDaemonOpts = append(ctrdDaemonOpts, supervisord.WithV1RuntimeShimDebug()) } + ctrdDaemonOpts = append(ctrdDaemonOpts, supervisord.WithV1RuntimeShimDebug()) + ctrdDaemon, err := supervisord.Start(context.TODO(), filepath.Join(cfg.HomeDir, "containerd/root"), filepath.Join(cfg.HomeDir, "containerd/state"), From e8ee069bf63657c9022649dda3cb0fa605dea9c0 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Thu, 31 Oct 2019 12:58:18 +0800 Subject: [PATCH 2/4] ctrd: try to unpack image when create snapshot PouchContainer always unpacks for pulled images. But if there is any crash or terminated, the unpack action will be stopped. And image metadata is stored in containerd. It will cause that the following create container request fails. Try to unpack it if fail. Signed-off-by: Wei Fu --- ctrd/snapshot.go | 50 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/ctrd/snapshot.go b/ctrd/snapshot.go index c0a9815eb..0db936636 100644 --- a/ctrd/snapshot.go +++ b/ctrd/snapshot.go @@ -4,9 +4,10 @@ import ( "context" "fmt" + "github.com/alibaba/pouch/pkg/log" + "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/leases" "github.com/containerd/containerd/mount" - "github.com/containerd/containerd/platforms" "github.com/containerd/containerd/snapshots" "github.com/opencontainers/image-spec/identity" ) @@ -38,20 +39,59 @@ func (c *Client) CreateSnapshot(ctx context.Context, id, ref string) error { if err != nil { return fmt.Errorf("failed to get a containerd grpc client: %v", err) } + + original_ctx := ctx ctx = leases.WithLease(ctx, wrapperCli.lease.ID) - image, err := wrapperCli.client.ImageService().Get(ctx, ref) + var ( + snName = CurrentSnapshotterName(ctx) + snSrv = wrapperCli.client.SnapshotService(snName) + ) + + image, err := wrapperCli.client.GetImage(ctx, ref) if err != nil { return err } - diffIDs, err := image.RootFS(ctx, wrapperCli.client.ContentStore(), platforms.Default()) + diffIDs, err := image.RootFS(ctx) if err != nil { return err } - parent := identity.ChainID(diffIDs).String() - _, err = wrapperCli.client.SnapshotService(CurrentSnapshotterName(ctx)).Prepare(ctx, id, parent) + + // NOTE: PouchContainer always unpacks image during pulling. But there + // maybe crash or terminated by some reason. The image have been stored + // in containerd without unpacking. And the following creating container + // request will fail on preparing snapshot because there is no such + // parent snapshotter. Based on this case, we should skip the not + // found error and try to unpack it again. + _, err = snSrv.Prepare(ctx, id, parent) + if err == nil || !errdefs.IsNotFound(err) { + return err + } + log.With(ctx).Warnf("checking unpack status for image %s on %s snapshotter...", image.Name(), snName) + + // check unpacked status + unpacked, werr := image.IsUnpacked(ctx, snName) + if werr != nil { + log.With(ctx).Warnf("failed to check unpack status for image %s on %s snapshotter: %v", image.Name(), snName, werr) + return err + } + + // if it is not unpacked, try to unpack it. + if !unpacked { + log.With(ctx).Warnf("the image %s doesn't unpack for %s snapshotter, try to unpack it...", image.Name(), snName) + // NOTE: don't use pouchd lease id here because pouchd lease id + // will hold the snapshotter forever, which means that the + // snapshotter will not removed if we remove image. + if werr = image.Unpack(original_ctx, snName); werr != nil { + log.With(ctx).Warnf("failed to unpack for image %s on %s snapshotter: %v", image.Name(), snName, werr) + return err + } + + // do it again. + _, err = snSrv.Prepare(ctx, id, parent) + } return err } From 2ff6f87327a50c5c9fde0841409ebe0a3f478509 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Thu, 31 Oct 2019 14:25:46 +0800 Subject: [PATCH 3/4] test: add test to check snapshotter holder Signed-off-by: Wei Fu --- test/cli_restart_test.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/test/cli_restart_test.go b/test/cli_restart_test.go index 8332c6762..2b97cd533 100644 --- a/test/cli_restart_test.go +++ b/test/cli_restart_test.go @@ -118,3 +118,40 @@ func (suite *PouchRestartSuite) TestPouchRestartMultiContainers(c *check.C) { c.Fatalf("unexpected output: %s, expected: %s\n%s", out, containernames[0], containernames[1]) } } + +// TestPouchRestartMakeSnapshotterStillExist tests snapshotter holder functionality. +// +// NOTE: PouchContainer uses containerd lease and gc key reference to hold the +// container's writable layer. The containerd lease object will hold reference +// to container's writable layer until we remove the lease object. Basically, +// we don't remove the lease object so that the container's writable layer will +// be hold until we remove container. +// +// The container metadata in containerd side also holds the gc reference to +// container's writable snapshotter by option containerd.WithSnapshot. When +// container quits, the metadata will be removed by pouch exit hook. With lease +// the RW snapshotter is still referenced by lease and not deleted by GC. So +// we restart the container and it should be success. +func (suite *PouchRestartSuite) TestPouchRestartMakeSnapshotterStillExist(c *check.C) { + cname := "TestPouchRestartMakeSnapshotterStillExist" + res := command.PouchRun("run", "--name", cname, busyboxImage, "sh", "-c", "ls -al /bin") + res.Assert(c, icmd.Success) + defer DelContainerForceMultyTime(c, cname) + + lines := res.Stdout() + c.Assert(len(strings.Split(lines, "\n")) > 3, check.Equals, true) + + // try to trigger containerd gc by image remove + // + // TODO(fuweid): slow... + command.PouchRun("pull", busyboxImage125).Assert(c, icmd.Success) + command.PouchRun("rmi", busyboxImage125).Assert(c, icmd.Success) + + // the container snapshotter should be hold by containerd lease named by pouchd.leases. + res = command.PouchRun("start", "-a", cname) + res.Assert(c, icmd.Success) + + // since no one changes bin folder, the output should be the same. + secondLines := res.Stdout() + c.Assert(lines, check.Equals, secondLines) +} From f7f39df873eff0e6a67d0120bc046376611d5a83 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Thu, 31 Oct 2019 14:57:03 +0800 Subject: [PATCH 4/4] ctrd: fix naming issue original_ctx -> originalCtx And also add testing common conf for new test case. Signed-off-by: Wei Fu --- ctrd/snapshot.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ctrd/snapshot.go b/ctrd/snapshot.go index 0db936636..c10062c76 100644 --- a/ctrd/snapshot.go +++ b/ctrd/snapshot.go @@ -40,7 +40,7 @@ func (c *Client) CreateSnapshot(ctx context.Context, id, ref string) error { return fmt.Errorf("failed to get a containerd grpc client: %v", err) } - original_ctx := ctx + originalCtx := ctx ctx = leases.WithLease(ctx, wrapperCli.lease.ID) var ( @@ -84,7 +84,7 @@ func (c *Client) CreateSnapshot(ctx context.Context, id, ref string) error { // NOTE: don't use pouchd lease id here because pouchd lease id // will hold the snapshotter forever, which means that the // snapshotter will not removed if we remove image. - if werr = image.Unpack(original_ctx, snName); werr != nil { + if werr = image.Unpack(originalCtx, snName); werr != nil { log.With(ctx).Warnf("failed to unpack for image %s on %s snapshotter: %v", image.Name(), snName, werr) return err }