From 5fe470102c55e0d256efbbc15b4a124abe7a1a6a Mon Sep 17 00:00:00 2001 From: Wang Xu Date: Sun, 9 Apr 2017 15:13:16 +0800 Subject: [PATCH 1/4] use errcode for kinds of errors Signed-off-by: Wang Xu --- errors/errors.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 errors/errors.go diff --git a/errors/errors.go b/errors/errors.go new file mode 100644 index 00000000..fad4ed46 --- /dev/null +++ b/errors/errors.go @@ -0,0 +1,29 @@ +package errors + +import ( + "net/http" + + "github.com/docker/distribution/registry/api/errcode" +) + +const errGroup = "hyperd" + +var ( + ErrorCodeCommon = errcode.Register(errGroup, errcode.ErrorDescriptor{ + Value: "COMMONERROR", + Message: "%v", + HTTPStatusCode: http.StatusInternalServerError, + }) + + ErrPodNotAlive = errcode.Register(errGroup, errcode.ErrorDescriptor{ + Value: "HYPER_POD_NOT_ALIVE", + Message: "cannot complete the operation, because the pod %s is not alive", + HTTPStatusCode: http.StatusPreconditionFailed, + }) + + ErrPodNotRunning = errcode.Register(errGroup, errcode.ErrorDescriptor{ + Value: "HYPER_POD_NOT_RUNNING", + Message: "cannot complete the operation, because the pod %s is not running", + HTTPStatusCode: http.StatusPreconditionFailed, + }) +) From 054ccafde70ca9f423909289886482fe73b1895d Mon Sep 17 00:00:00 2001 From: Wang Xu Date: Tue, 11 Apr 2017 17:30:19 +0800 Subject: [PATCH 2/4] legacy pod load does not need allocate IPs maybe need get current IPs for save? Signed-off-by: Wang Xu --- daemon/pod/migration.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/daemon/pod/migration.go b/daemon/pod/migration.go index 5656f52e..4b0e0a83 100644 --- a/daemon/pod/migration.go +++ b/daemon/pod/migration.go @@ -198,9 +198,6 @@ func persistLagecyPod(factory *PodFactory, spec *apitypes.UserPod) error { if err = p.initResources(spec, false); err != nil { return err } - if err = p.prepareResources(); err != nil { - return err - } if err = p.savePod(); err != nil { return err From 50b0452516eb8715e11d3ee9420b820b75901261 Mon Sep 17 00:00:00 2001 From: Wang Xu Date: Wed, 12 Apr 2017 17:06:24 +0800 Subject: [PATCH 3/4] do not treat starting a running container as failure Signed-off-by: Wang Xu --- daemon/pod/container.go | 12 ++++++++++-- errors/errors.go | 6 ++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/daemon/pod/container.go b/daemon/pod/container.go index 9605b667..f72165d3 100644 --- a/daemon/pod/container.go +++ b/daemon/pod/container.go @@ -20,6 +20,7 @@ import ( "github.com/docker/engine-api/types/strslice" "github.com/hyperhq/hypercontainer-utils/hlog" + "github.com/hyperhq/hyperd/errors" apitypes "github.com/hyperhq/hyperd/types" "github.com/hyperhq/hyperd/utils" runv "github.com/hyperhq/runv/api" @@ -233,6 +234,11 @@ func (c *Container) Add() error { func (c *Container) start() error { if err := c.status.Start(); err != nil { + if err == errors.ErrContainerAlreadyRunning { + err = nil + c.Log(INFO, "container in running state, do not need start") + return nil + } c.Log(ERROR, err) return err } @@ -1196,8 +1202,10 @@ func (cs *ContainerStatus) Start() error { cs.Lock() defer cs.Unlock() - if cs.State != S_CONTAINER_CREATED { - return fmt.Errorf("only CREATING container could be set to creatd, current: %d", cs.State) + if cs.State == S_CONTAINER_RUNNING { + return errors.ErrContainerAlreadyRunning + } else if cs.State != S_CONTAINER_CREATED { + return fmt.Errorf("only CREATED container could be set to running, current: %d", cs.State) } cs.Killed = false diff --git a/errors/errors.go b/errors/errors.go index fad4ed46..2780c733 100644 --- a/errors/errors.go +++ b/errors/errors.go @@ -26,4 +26,10 @@ var ( Message: "cannot complete the operation, because the pod %s is not running", HTTPStatusCode: http.StatusPreconditionFailed, }) + + ErrContainerAlreadyRunning = errcode.Register(errGroup, errcode.ErrorDescriptor{ + Value: "HYPER_CONTAINER_RUNNING", + Message: "container %s is in running state", + HTTPStatusCode: http.StatusPreconditionFailed, + }) ) From 6e16dcaa2af29c78d7b3101818517aa2285cfc1c Mon Sep 17 00:00:00 2001 From: Wang Xu Date: Wed, 12 Apr 2017 17:06:39 +0800 Subject: [PATCH 4/4] protected the pod provision procedures if sandbox failed, the cleanup and pod provision may race Signed-off-by: Wang Xu --- daemon/pod/decommission.go | 6 ++-- daemon/pod/pod.go | 21 ++++++++---- daemon/pod/provision.go | 67 ++++++++++++++++++++++++++++++-------- 3 files changed, 72 insertions(+), 22 deletions(-) diff --git a/daemon/pod/decommission.go b/daemon/pod/decommission.go index 4f6efcdc..c3464bc3 100644 --- a/daemon/pod/decommission.go +++ b/daemon/pod/decommission.go @@ -536,14 +536,14 @@ func (p *XPod) cleanup() { p.resourceLock.Lock() defer p.resourceLock.Unlock() - p.statusLock.RLock() + p.statusLock.Lock() if p.status == S_POD_STOPPED || p.status == S_POD_NONE { - p.statusLock.RUnlock() + p.statusLock.Unlock() return } else { p.status = S_POD_STOPPING } - p.statusLock.RUnlock() + p.statusLock.Unlock() err := p.decommissionResources() if err != nil { diff --git a/daemon/pod/pod.go b/daemon/pod/pod.go index 21f79952..d676edfa 100644 --- a/daemon/pod/pod.go +++ b/daemon/pod/pod.go @@ -74,6 +74,7 @@ type XPod struct { // got a value from this chan, it should put an element to it again, in case other procedure may wait // on it too. stoppedChan chan bool + initCond *sync.Cond } // The Log infrastructure, to add pod name as prefix of the log message. @@ -110,29 +111,37 @@ func (p *XPod) SandboxName() string { } func (p *XPod) IsNone() bool { - p.statusLock.Lock() + p.statusLock.RLock() isNone := p.status == S_POD_NONE - p.statusLock.Unlock() + p.statusLock.RUnlock() return isNone } func (p *XPod) IsRunning() bool { - p.statusLock.Lock() + p.statusLock.RLock() running := p.status == S_POD_RUNNING - p.statusLock.Unlock() + p.statusLock.RUnlock() return running } func (p *XPod) IsAlive() bool { - p.statusLock.Lock() + p.statusLock.RLock() alive := (p.status == S_POD_RUNNING) || (p.status == S_POD_STARTING) - p.statusLock.Unlock() + p.statusLock.RUnlock() return alive } +func (p *XPod) IsStopped() bool { + p.statusLock.RLock() + stopped := p.status == S_POD_STOPPED + p.statusLock.RUnlock() + + return stopped +} + func (p *XPod) IsContainerRunning(cid string) bool { if !p.IsRunning() { return false diff --git a/daemon/pod/provision.go b/daemon/pod/provision.go index 76939ad5..8d28d0a3 100644 --- a/daemon/pod/provision.go +++ b/daemon/pod/provision.go @@ -7,6 +7,7 @@ import ( "time" "github.com/hyperhq/hypercontainer-utils/hlog" + "github.com/hyperhq/hyperd/errors" apitypes "github.com/hyperhq/hyperd/types" "github.com/hyperhq/hyperd/utils" runv "github.com/hyperhq/runv/api" @@ -84,7 +85,7 @@ func newXPod(factory *PodFactory, spec *apitypes.UserPod) (*XPod, error) { } factory.hosts = HostsCreator(spec.Id) factory.logCreator = initLogCreator(factory, spec) - return &XPod{ + p := &XPod{ name: spec.Id, logPrefix: fmt.Sprintf("Pod[%s] ", spec.Id), globalSpec: spec.CloneGlobalPart(), @@ -98,7 +99,9 @@ func newXPod(factory *PodFactory, spec *apitypes.UserPod) (*XPod, error) { statusLock: &sync.RWMutex{}, stoppedChan: make(chan bool, 1), factory: factory, - }, nil + } + p.initCond = sync.NewCond(p.statusLock.RLocker()) + return p, nil } func (p *XPod) ContainerCreate(c *apitypes.UserContainer) (string, error) { @@ -211,7 +214,7 @@ func (p *XPod) ContainerStart(cid string) error { // Start() means start a STOPPED pod. func (p *XPod) Start() error { - if p.status == S_POD_STOPPED { + if p.IsStopped() { if err := p.createSandbox(p.globalSpec); err != nil { p.Log(ERROR, "failed to create sandbox for the stopped pod: %v", err) return err @@ -226,13 +229,12 @@ func (p *XPod) Start() error { } } - if p.status == S_POD_RUNNING { - if err := p.startAll(); err != nil { - return err - } - } else { - err := fmt.Errorf("not in proper status and could not be started: %v", p.status) - p.Log(ERROR, err) + err := p.waitPodRun("start pod") + if err != nil { + p.Log(ERROR, "wait running failed, cannot start pod") + return err + } + if err := p.startAll(); err != nil { return err } @@ -299,20 +301,20 @@ func (p *XPod) waitVMInit() { } r := p.sandbox.WaitInit() p.Log(INFO, "sandbox init result: %#v", r) + p.statusLock.Lock() if r.IsSuccess() { - p.statusLock.Lock() if p.status == S_POD_STARTING { p.status = S_POD_RUNNING } - p.statusLock.Unlock() } else { p.statusLock.Lock() if p.sandbox != nil { go p.sandbox.Shutdown() } p.status = S_POD_STOPPING - p.statusLock.Unlock() } + p.initCond.Broadcast() + p.statusLock.Unlock() } func (p *XPod) reserveNames(containers []*apitypes.UserContainer) error { @@ -391,6 +393,15 @@ func (p *XPod) prepareResources() error { var ( err error ) + + p.resourceLock.Lock() + defer p.resourceLock.Unlock() + + if !p.IsAlive() { + p.Log(ERROR, "pod is not alive, can not prepare resources") + return errors.ErrPodNotAlive.WithArgs(p.Id()) + } + //generate /etc/hosts p.factory.hosts.Do() @@ -414,6 +425,14 @@ func (p *XPod) prepareResources() error { // addResourcesToSandbox() add resources to sandbox parallelly, it issues // runV API parallelly to send the NIC, Vols, and Containers to sandbox func (p *XPod) addResourcesToSandbox() error { + p.resourceLock.Lock() + defer p.resourceLock.Unlock() + + if !p.IsAlive() { + p.Log(ERROR, "pod is not alive, can not add resources to sandbox") + return errors.ErrPodNotAlive.WithArgs(p.Id()) + } + p.Log(INFO, "adding resource to sandbox") future := utils.NewFutureSet() @@ -471,3 +490,25 @@ func (p *XPod) sandboxShareDir() string { } return filepath.Join(hypervisor.BaseDir, p.sandbox.Id, hypervisor.ShareDirTag) } + +func (p *XPod) waitPodRun(activity string) error { + p.statusLock.RLock() + for { + if p.status == S_POD_RUNNING || p.status == S_POD_PAUSED { + p.statusLock.RUnlock() + p.Log(DEBUG, "pod is running, proceed %s", activity) + return nil + } + if p.status != S_POD_STARTING { + p.statusLock.RUnlock() + // only starting could transit to running, if not starting, that's mean failed + p.Log(ERROR, "pod is not running, cannot %s", activity) + return errors.ErrPodNotRunning.WithArgs(p.Id()) + } + p.Log(TRACE, "wait for pod running") + p.initCond.Wait() + } + // should never reach here + p.statusLock.RUnlock() + return errors.ErrorCodeCommon.WithArgs("reach unreachable code...") +}