Skip to content

Commit

Permalink
merge branch 'pr-2774'
Browse files Browse the repository at this point in the history
Kir Kolyshkin (1):
  libct/cg/fs/freezer: fix freezing race

LGTMs: @mrunalp @cyphar
Closes #2774
  • Loading branch information
cyphar committed Feb 1, 2021
2 parents 6c85f63 + 76ae1f5 commit cc988c1
Showing 1 changed file with 35 additions and 14 deletions.
49 changes: 35 additions & 14 deletions libcontainer/cgroups/fs/freezer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,54 @@ func (s *FreezerGroup) Apply(path string, d *cgroupData) error {

func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
switch cgroup.Resources.Freezer {
case configs.Frozen, configs.Thawed:
for {
// In case this loop does not exit because it doesn't get the expected
// state, let's write again this state, hoping it's going to be properly
// set this time. Otherwise, this loop could run infinitely, waiting for
// a state change that would never happen.
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
case configs.Frozen:
// As per older kernel docs (freezer-subsystem.txt before
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
// userspace should either retry or thaw. While current
// kernel cgroup v1 docs no longer mention a need to retry,
// the kernel (tested on v5.4, Ubuntu 20.04) can't reliably
// freeze a cgroup while new processes keep appearing in it
// (either via fork/clone or by writing new PIDs to
// cgroup.procs).
//
// The number of retries below is chosen to have a decent
// chance to succeed even in the worst case scenario (runc
// pause/unpause with parallel runc exec).
//
// Adding any amount of sleep in between retries did not
// increase the chances of successful freeze.
for i := 0; i < 1000; i++ {
if err := fscommon.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
return err
}

state, err := s.GetState(path)
state, err := fscommon.ReadFile(path, "freezer.state")
if err != nil {
return err
}
if state == cgroup.Resources.Freezer {
break
state = strings.TrimSpace(state)
switch state {
case "FREEZING":
continue
case string(configs.Frozen):
return nil
default:
// should never happen
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
}

time.Sleep(1 * time.Millisecond)
}
// Despite our best efforts, it got stuck in FREEZING.
// Leaving it in this state is bad and dangerous, so
// let's (try to) thaw it back and error out.
_ = fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
return errors.New("unable to freeze")
case configs.Thawed:
return fscommon.WriteFile(path, "freezer.state", string(configs.Thawed))
case configs.Undefined:
return nil
default:
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
}

return nil
}

func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
Expand Down

0 comments on commit cc988c1

Please sign in to comment.