Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: add retry helper method for Trylock #2319

Merged
merged 1 commit into from
Oct 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions ctrd/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ import (
)

var (
runtimeRoot = "/run"
runtimeRoot = "/run"
defaultTrylockTimeout = 5 * time.Second
)

type containerPack struct {
Expand All @@ -55,7 +56,7 @@ func (c *Client) ContainerStats(ctx context.Context, id string) (*containerdtype

// containerStats returns stats of the container.
func (c *Client) containerStats(ctx context.Context, id string) (*containerdtypes.Metric, error) {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return nil, errtypes.ErrLockfailed
Copy link
Collaborator

@allencloud allencloud Oct 17, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part would return an error of

failed: rpc error: code = Unknown desc = 
failed to stop container "c69994fce9611e75ca826c644edb8f4dbc32a2592afb66dd34e1d4a2d9a76c0e": 
failed to destroy container c69994fce9611e75ca826c644edb8f4dbc32a2592afb66dd34e1d4a2d9a76c0e:
 lock failed"

While I think some users of PouchContainer feel a little bit unclear about the error message, could we add code like

// LockFailedError is constructing a much more readable lock failed error.
func LockFailedError(containerID string) error {
	return errors.Wrapf(errtypes.ErrLockfailed, "container %s is accessed by other request and please try again", containerID)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the error will be disappeared after this PR 😢

}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -196,7 +197,7 @@ func (c *Client) ContainerPIDs(ctx context.Context, id string) ([]int, error) {

// containerPIDs returns the all processes's ids inside the container.
func (c *Client) containerPIDs(ctx context.Context, id string) ([]int, error) {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return nil, errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -255,7 +256,7 @@ func (c *Client) recoverContainer(ctx context.Context, id string, io *containeri
return fmt.Errorf("failed to get a containerd grpc client: %v", err)
}

if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -317,7 +318,7 @@ func (c *Client) destroyContainer(ctx context.Context, id string, timeout int64)

ctx = leases.WithLease(ctx, wrapperCli.lease.ID())

if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return nil, errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -385,7 +386,7 @@ func (c *Client) PauseContainer(ctx context.Context, id string) error {

// pauseContainer pause container.
func (c *Client) pauseContainer(ctx context.Context, id string) error {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -416,7 +417,7 @@ func (c *Client) UnpauseContainer(ctx context.Context, id string) error {

// unpauseContainer unpauses a container.
func (c *Client) unpauseContainer(ctx context.Context, id string) error {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -444,7 +445,7 @@ func (c *Client) CreateContainer(ctx context.Context, container *Container, chec
id = container.ID
)

if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -596,7 +597,7 @@ func (c *Client) UpdateResources(ctx context.Context, id string, resources types

// updateResources updates the configurations of a container.
func (c *Client) updateResources(ctx context.Context, id string, resources types.Resources) error {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down Expand Up @@ -626,7 +627,7 @@ func (c *Client) ResizeContainer(ctx context.Context, id string, opts types.Resi
// resizeContainer changes the size of the TTY of the init process running
// in the container to the given height and width.
func (c *Client) resizeContainer(ctx context.Context, id string, opts types.ResizeOptions) error {
if !c.lock.Trylock(id) {
if !c.lock.TrylockWithRetry(ctx, id) {
return errtypes.ErrLockfailed
}
defer c.lock.Unlock(id)
Expand Down
25 changes: 25 additions & 0 deletions ctrd/container_lock.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package ctrd

import (
"context"
"math/rand"
"sync"
"time"
)

// containerLock use to make sure that only one operates the container at the same time.
Expand All @@ -27,3 +30,25 @@ func (l *containerLock) Unlock(id string) {
defer l.mutex.Unlock()
delete(l.ids, id)
}

func (l *containerLock) TrylockWithRetry(ctx context.Context, id string) bool {
var retry = 32

for {
ok := l.Trylock(id)
if ok {
return true
}

// sleep random duration by retry
select {
case <-time.After(time.Millisecond * time.Duration(rand.Intn(retry))):
if retry < 2048 {
retry = retry << 1
}
continue
case <-ctx.Done():
return false
}
}
}
43 changes: 43 additions & 0 deletions ctrd/container_lock_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,54 @@
package ctrd

import (
"context"
"testing"
"time"

"github.com/stretchr/testify/assert"
)

func Test_containerLock_TrylockWithRetry(t *testing.T) {
l := &containerLock{
ids: make(map[string]struct{}),
}

// basically, if the releaseTimeout < the tryLockTimeout,
// TrylockWithTimeout will lock successfully. If not, it will fail.
runTrylockWithT := func(tryLockTimeout, releaseTimeout time.Duration) bool {
id := "c"
assert.Equal(t, l.Trylock(id), true)
defer l.Unlock(id)

var (
releaseCh = make(chan struct{})
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the usage of this variable releaseCh?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The releaseCh variable try to promise that TrylockWithRetry will execute before the sleep and unlock function in the main go routine. Otherwise, unlock is done, TrylockWithRetry will success immediately.

waitCh = make(chan bool)
res bool
)

go func() {
close(releaseCh)
ctx, cancel := context.WithTimeout(context.TODO(), tryLockTimeout)
defer cancel()
waitCh <- l.TrylockWithRetry(ctx, id)
}()

<-releaseCh
time.Sleep(releaseTimeout)
l.Unlock(id)

select {
case <-time.After(3 * time.Second):
t.Fatalf("timeout to get the Trylock result")
case res = <-waitCh:
}
return res
}

assert.Equal(t, true, runTrylockWithT(5*time.Second, 200*time.Millisecond))
assert.Equal(t, false, runTrylockWithT(200*time.Millisecond, 500*time.Millisecond))
}

func Test_containerLock_Trylock(t *testing.T) {
l := &containerLock{
ids: make(map[string]struct{}),
Expand Down