-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12419 from hashicorp/exec-cleanup
raw_exec: make raw exec driver work with cgroups v2
- Loading branch information
Showing
36 changed files
with
687 additions
and
308 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:improvement | ||
Add support for cgroups v2 in raw_exec driver | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
//go:build linux | ||
|
||
package cgutil | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"os" | ||
"path/filepath" | ||
"time" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/opencontainers/runc/libcontainer/cgroups" | ||
"github.com/opencontainers/runc/libcontainer/cgroups/fs" | ||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2" | ||
"github.com/opencontainers/runc/libcontainer/configs" | ||
) | ||
|
||
// freezer is the name of the cgroup subsystem used for stopping / starting | ||
// a group of processes | ||
const freezer = "freezer" | ||
|
||
// thawed and frozen are the two states we put a cgroup in when trying to remove it | ||
var ( | ||
thawed = &configs.Resources{Freezer: configs.Thawed} | ||
frozen = &configs.Resources{Freezer: configs.Frozen} | ||
) | ||
|
||
// GroupKiller is used for SIGKILL-ing the process tree[s] of a cgroup by leveraging | ||
// the freezer cgroup subsystem. | ||
type GroupKiller interface { | ||
KillGroup(cgroup *configs.Cgroup) error | ||
} | ||
|
||
// NewGroupKiller creates a GroupKiller with executor PID pid. | ||
func NewGroupKiller(logger hclog.Logger, pid int) GroupKiller { | ||
return &killer{ | ||
logger: logger.Named("group_killer"), | ||
pid: pid, | ||
} | ||
} | ||
|
||
type killer struct { | ||
logger hclog.Logger | ||
pid int | ||
} | ||
|
||
// KillGroup will SIGKILL the process tree present in cgroup, using the freezer | ||
// subsystem to prevent further forking, etc. | ||
func (d *killer) KillGroup(cgroup *configs.Cgroup) error { | ||
if UseV2 { | ||
return d.v2(cgroup) | ||
} | ||
return d.v1(cgroup) | ||
} | ||
|
||
func (d *killer) v1(cgroup *configs.Cgroup) error { | ||
if cgroup == nil { | ||
return errors.New("missing cgroup") | ||
} | ||
|
||
// the actual path to our tasks freezer cgroup | ||
path := cgroup.Paths[freezer] | ||
|
||
d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v1", "executor_pid", d.pid) | ||
|
||
// move executor PID into the init freezer cgroup so we can kill the task | ||
// pids without killing the executor (which is the process running this code, | ||
// doing the killing) | ||
initPath, err := cgroups.GetInitCgroupPath(freezer) | ||
if err != nil { | ||
return fmt.Errorf("failed to find init cgroup: %w", err) | ||
} | ||
m := map[string]string{freezer: initPath} | ||
if err = cgroups.EnterPid(m, d.pid); err != nil { | ||
return fmt.Errorf("failed to add executor pid to init cgroup: %w", err) | ||
} | ||
|
||
// ability to freeze the cgroup | ||
freeze := func() { | ||
_ = new(fs.FreezerGroup).Set(path, frozen) | ||
} | ||
|
||
// ability to thaw the cgroup | ||
thaw := func() { | ||
_ = new(fs.FreezerGroup).Set(path, thawed) | ||
} | ||
|
||
// do the common kill logic | ||
if err = d.kill(path, freeze, thaw); err != nil { | ||
return err | ||
} | ||
|
||
// remove the cgroup from disk | ||
return cgroups.RemovePath(path) | ||
} | ||
|
||
func (d *killer) v2(cgroup *configs.Cgroup) error { | ||
if cgroup == nil { | ||
return errors.New("missing cgroup") | ||
} | ||
|
||
path := filepath.Join(CgroupRoot, cgroup.Path) | ||
|
||
existingPIDs, err := cgroups.GetPids(path) | ||
if err != nil { | ||
return fmt.Errorf("failed to determine pids in cgroup: %w", err) | ||
} | ||
|
||
d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v2", "executor_pid", d.pid, "existing_pids", existingPIDs) | ||
|
||
mgr, err := fs2.NewManager(cgroup, "", rootless) | ||
if err != nil { | ||
return fmt.Errorf("failed to create v2 cgroup manager: %w", err) | ||
} | ||
|
||
// move executor PID into the root init.scope so we can kill the task pids | ||
// without killing the executor (which is the process running this code, doing | ||
// the killing) | ||
init, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, "init.scope"), rootless) | ||
if err != nil { | ||
return fmt.Errorf("failed to create v2 init cgroup manager: %w", err) | ||
} | ||
if err = init.Apply(d.pid); err != nil { | ||
return fmt.Errorf("failed to move executor pid into init.scope cgroup: %w", err) | ||
} | ||
|
||
d.logger.Trace("move of executor pid into init.scope complete", "pid", d.pid) | ||
|
||
// ability to freeze the cgroup | ||
freeze := func() { | ||
_ = mgr.Freeze(configs.Frozen) | ||
} | ||
|
||
// ability to thaw the cgroup | ||
thaw := func() { | ||
_ = mgr.Freeze(configs.Thawed) | ||
} | ||
|
||
// do the common kill logic | ||
|
||
if err = d.kill(path, freeze, thaw); err != nil { | ||
return err | ||
} | ||
|
||
// remove the cgroup from disk | ||
return mgr.Destroy() | ||
} | ||
|
||
// kill is used to SIGKILL all processes in cgroup | ||
// | ||
// The order of operations is | ||
// 0. before calling this method, the executor pid has been moved outside of cgroup | ||
// 1. freeze cgroup (so processes cannot fork further) | ||
// 2. scan the cgroup to collect all pids | ||
// 3. issue SIGKILL to each pid found | ||
// 4. thaw the cgroup so processes can go die | ||
// 5. wait on each processes until it is confirmed dead | ||
func (d *killer) kill(cgroup string, freeze func(), thaw func()) error { | ||
// freeze the cgroup stopping further forking | ||
freeze() | ||
|
||
d.logger.Trace("search for pids in", "cgroup", cgroup) | ||
|
||
// find all the pids we intend to kill | ||
pids, err := cgroups.GetPids(cgroup) | ||
if err != nil { | ||
// if we fail to get pids, re-thaw before bailing so there is at least | ||
// a chance the processes can go die out of band | ||
thaw() | ||
return fmt.Errorf("failed to find pids: %w", err) | ||
} | ||
|
||
d.logger.Trace("send sigkill to frozen processes", "cgroup", cgroup, "pids", pids) | ||
|
||
var processes []*os.Process | ||
|
||
// kill the processes in cgroup | ||
for _, pid := range pids { | ||
p, findErr := os.FindProcess(pid) | ||
if findErr != nil { | ||
d.logger.Trace("failed to find process of pid to kill", "pid", pid, "error", findErr) | ||
continue | ||
} | ||
processes = append(processes, p) | ||
if killErr := p.Kill(); killErr != nil { | ||
d.logger.Trace("failed to kill process", "pid", pid, "error", killErr) | ||
continue | ||
} | ||
} | ||
|
||
// thawed the cgroup so we can wait on each process | ||
thaw() | ||
|
||
// wait on each process | ||
for _, p := range processes { | ||
// do not capture error; errors are normal here | ||
pState, _ := p.Wait() | ||
d.logger.Trace("return from wait on process", "pid", p.Pid, "state", pState) | ||
} | ||
|
||
// cgroups are not atomic, the OS takes a moment to un-mark the cgroup as in-use; | ||
// a tiny sleep here goes a long way for not creating noisy (but functionally benign) | ||
// errors about removing busy cgroup | ||
// | ||
// alternatively we could do the removal in a loop and silence the interim errors, but meh | ||
time.Sleep(50 * time.Millisecond) | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package resources | ||
|
||
// A Containment will cleanup resources created by an executor. | ||
type Containment interface { | ||
// Apply enables containment on pid. | ||
Apply(pid int) error | ||
|
||
// Cleanup will purge executor resources like cgroups. | ||
Cleanup() error | ||
|
||
// GetPIDs will return the processes overseen by the Containment | ||
GetPIDs() PIDs | ||
} |
Oops, something went wrong.