Skip to content

Commit

Permalink
fix: lock the whole markStoppendAndRelease to be automic
Browse files Browse the repository at this point in the history
Signed-off-by: Allen Sun <allensun.shl@alibaba-inc.com>
  • Loading branch information
allencloud committed Jun 13, 2018
1 parent a0ef39d commit ef5437c
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 22 deletions.
56 changes: 35 additions & 21 deletions daemon/mgr/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,7 @@ func (mgr *ContainerManager) Restart(ctx context.Context, name string, timeout i

if c.IsRunningOrPaused() {
// stop container if it is running or paused.
logrus.Infof("stop container(id: %s, name: %s) when restarting", c.ID, c.Name)
if err := mgr.stop(ctx, c, timeout); err != nil {
ex := fmt.Errorf("failed to stop container %s when restarting: %v", c.ID, err)
logrus.Errorf(ex.Error())
Expand Down Expand Up @@ -895,6 +896,8 @@ func (mgr *ContainerManager) Remove(ctx context.Context, name string, options *t
}
}

c.SetStatusRemoving()

if err := mgr.detachVolumes(ctx, c, options.Volumes); err != nil {
logrus.Errorf("failed to detach volume: %v", err)
}
Expand All @@ -905,9 +908,18 @@ func (mgr *ContainerManager) Remove(ctx context.Context, name string, options *t
c.Unlock()

// remove meta data
logrus.Infof("start to remove meta data of container %s", c.ID)
if err := mgr.Store.Remove(c.Key()); err != nil {
logrus.Errorf("failed to remove container %s from meta store: %v", c.ID, err)
}
// After removing container meta data from mgr.Store, nothing about container should exist
// Then assign the container object in memory to be nil to avoid other issues.

logrus.Infof("testing if we can get container %s after removing from meta store", c.ID)
if c, err := mgr.container(name); err != nil || c == nil {
logrus.Errorf("no this container")
}
logrus.Infof("bug ---------- container %s is still there")

// remove container cache
mgr.cache.Remove(c.ID)
Expand Down Expand Up @@ -1659,17 +1671,35 @@ func attachConfigToOptions(attach *AttachConfig) []func(*containerio.Option) {

func (mgr *ContainerManager) markStoppedAndRelease(c *Container, m *ctrd.Message) error {
var (
code int64 // container exit code used for container state setting
errMsg string // container exit error message used for container state setting
exitCode int64 // container exit code used for container state setting
errMsg string // container exit error message used for container state setting
)
if m != nil {
code = int64(m.ExitCode())
exitCode = int64(m.ExitCode())
if err := m.RawError(); err != nil {
errMsg = err.Error()
}
}

c.SetStatusStopped(code, errMsg)
c.Lock()
defer c.Unlock()

defer func() {
logrus.Infof("write to meta store when markStoppedAndRelease")
if c.IsRemoving() {
return
}
if err := c.Write(mgr.Store); err != nil {
logrus.Errorf("failed to update meta: %v", err)
}
logrus.Infof("finish to write meta store to disk")
}()

c.State.Status = types.StatusStopped
c.State.FinishedAt = time.Now().UTC().Format(utils.TimeLayout)
c.State.Pid = -1
c.State.ExitCode = exitCode
c.State.Error = errMsg

// unset Snapshot MergedDir. Stop a container will
// delete the containerd container, the merged dir
Expand All @@ -1689,26 +1719,18 @@ func (mgr *ContainerManager) markStoppedAndRelease(c *Container, m *ctrd.Message
return nil
}

// Remove io and network config may occur error, so we should update
// container's status on disk as soon as possible.
if err := c.Write(mgr.Store); err != nil {
logrus.Errorf("failed to update meta: %v", err)
return err
}

// release resource
if io := mgr.IOs.Get(c.ID); io != nil {
io.Close()
mgr.IOs.Remove(c.ID)
}

// No network binded, just return
c.Lock()
if c.NetworkSettings == nil {
c.Unlock()
return nil
}

// when container exits, pouchd should automatically remove container network endpoints.
for name, epConfig := range c.NetworkSettings.Networks {
endpoint := mgr.buildContainerEndpoint(c)
endpoint.Name = name
Expand All @@ -1718,18 +1740,10 @@ func (mgr *ContainerManager) markStoppedAndRelease(c *Container, m *ctrd.Message
// not found"" as an error type
if !strings.Contains(err.Error(), "not found") {
logrus.Errorf("failed to remove endpoint: %v", err)
c.Unlock()
return err
}
}
}
c.Unlock()

// update meta
if err := c.Write(mgr.Store); err != nil {
logrus.Errorf("failed to update meta of container %s: %v", c.ID, err)
return err
}

return nil
}
Expand Down
14 changes: 14 additions & 0 deletions daemon/mgr/container_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ func (c *Container) IsRestarting() bool {
return c.State.Status == types.StatusRestarting
}

// IsRemoving returns container is removing or not.
func (c *Container) IsRemoving() bool {
c.Lock()
defer c.Unlock()
return c.State.Status == types.StatusRemoving
}

// ExitCode returns container's ExitCode.
func (c *Container) ExitCode() int64 {
c.Lock()
Expand Down Expand Up @@ -123,3 +130,10 @@ func (c *Container) SetStatusUnpaused() {
defer c.Unlock()
c.State.Status = types.StatusRunning
}

// SetStatusRemoving sets a container to be status removing.
func (c *Container) SetStatusRemoving() {
c.Lock()
defer c.Unlock()
c.State.Status = types.StatusRemoving
}
7 changes: 6 additions & 1 deletion daemon/mgr/container_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ type ContainerListOption struct {
type Container struct {
sync.Mutex

ContainerMeta
}

// ContainerMeta is a new meta
type ContainerMeta struct {
// app armor profile
AppArmorProfile string `json:"AppArmorProfile,omitempty"`

Expand Down Expand Up @@ -191,7 +196,7 @@ func (c *Container) Key() string {

// Write writes container's meta data into meta store.
func (c *Container) Write(store *meta.Store) error {
return store.Put(c)
return store.Put(c.ContainerMeta)
}

// StopTimeout returns the timeout (in seconds) used to stop the container.
Expand Down

0 comments on commit ef5437c

Please sign in to comment.