Skip to content

Commit

Permalink
Merge pull request #12419 from hashicorp/exec-cleanup
Browse files Browse the repository at this point in the history
raw_exec: make raw exec driver work with cgroups v2
  • Loading branch information
shoenig authored Apr 5, 2022
2 parents ca14fb0 + 1d2e2c0 commit 1334712
Show file tree
Hide file tree
Showing 36 changed files with 687 additions and 308 deletions.
3 changes: 3 additions & 0 deletions .changelog/12419.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
Add support for cgroups v2 in raw_exec driver
```
3 changes: 2 additions & 1 deletion .github/workflows/test-core.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ jobs:
run: |
make bootstrap
make generate-all
make test-nomad-module
sudo sed -i 's!Defaults!#Defaults!g' /etc/sudoers
sudo -E env "PATH=$PATH" make test-nomad-module
tests-pkgs:
runs-on: ubuntu-20.04
timeout-minutes: 30
Expand Down
1 change: 1 addition & 0 deletions api/fs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ func TestFS_Logs(t *testing.T) {
testutil.Parallel(t)
require := require.New(t)
rpcPort := 0

c, s := makeClient(t, nil, func(c *testutil.TestServerConfig) {
rpcPort = c.Ports.RPC
c.Client = &testutil.ClientConfig{
Expand Down
1 change: 1 addition & 0 deletions api/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ type Node struct {
Links map[string]string
Meta map[string]string
NodeClass string
CgroupParent string
Drain bool
DrainStrategy *DrainStrategy
SchedulingEligibility string
Expand Down
1 change: 0 additions & 1 deletion client/allocdir/fs_unix.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//go:build darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris
// +build darwin dragonfly freebsd linux netbsd openbsd solaris

package allocdir

Expand Down
1 change: 1 addition & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1432,6 +1432,7 @@ func (c *Client) setupNode() error {
if node.Name == "" {
node.Name, _ = os.Hostname()
}
node.CgroupParent = c.config.CgroupParent
if node.HostVolumes == nil {
if l := len(c.config.HostVolumes); l != 0 {
node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig, l)
Expand Down
1 change: 1 addition & 0 deletions client/fs_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ func TestFS_List_ACL(t *testing.T) {

func TestFS_Stream_NoAlloc(t *testing.T) {
ci.Parallel(t)
ci.SkipSlow(t, "flaky on GHA; #12358")
require := require.New(t)

// Start a client
Expand Down
8 changes: 4 additions & 4 deletions client/lib/cgutil/cgutil_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ func CgroupScope(allocID, task string) string {
return fmt.Sprintf("%s.%s.scope", allocID, task)
}

// ConfigureBasicCgroups will initialize cgroups for v1.
// ConfigureBasicCgroups will initialize a cgroup and modify config to contain
// a reference to its path.
//
// Not useful in cgroups.v2
// v1: creates a random "freezer" cgroup which can later be used for cleanup of processes.
// v2: does nothing.
func ConfigureBasicCgroups(config *lcc.Config) error {
if UseV2 {
// In v2 the default behavior is to create inherited interface files for
// all mounted subsystems automatically.
return nil
}

Expand Down
210 changes: 210 additions & 0 deletions client/lib/cgutil/group_killer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
//go:build linux

package cgutil

import (
"errors"
"fmt"
"os"
"path/filepath"
"time"

"github.com/hashicorp/go-hclog"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
)

// freezer is the name of the cgroup subsystem used for stopping / starting
// a group of processes
const freezer = "freezer"

// thawed and frozen are the two states we put a cgroup in when trying to remove it
var (
thawed = &configs.Resources{Freezer: configs.Thawed}
frozen = &configs.Resources{Freezer: configs.Frozen}
)

// GroupKiller is used for SIGKILL-ing the process tree[s] of a cgroup by leveraging
// the freezer cgroup subsystem.
type GroupKiller interface {
KillGroup(cgroup *configs.Cgroup) error
}

// NewGroupKiller creates a GroupKiller with executor PID pid.
func NewGroupKiller(logger hclog.Logger, pid int) GroupKiller {
return &killer{
logger: logger.Named("group_killer"),
pid: pid,
}
}

type killer struct {
logger hclog.Logger
pid int
}

// KillGroup will SIGKILL the process tree present in cgroup, using the freezer
// subsystem to prevent further forking, etc.
func (d *killer) KillGroup(cgroup *configs.Cgroup) error {
if UseV2 {
return d.v2(cgroup)
}
return d.v1(cgroup)
}

func (d *killer) v1(cgroup *configs.Cgroup) error {
if cgroup == nil {
return errors.New("missing cgroup")
}

// the actual path to our tasks freezer cgroup
path := cgroup.Paths[freezer]

d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v1", "executor_pid", d.pid)

// move executor PID into the init freezer cgroup so we can kill the task
// pids without killing the executor (which is the process running this code,
// doing the killing)
initPath, err := cgroups.GetInitCgroupPath(freezer)
if err != nil {
return fmt.Errorf("failed to find init cgroup: %w", err)
}
m := map[string]string{freezer: initPath}
if err = cgroups.EnterPid(m, d.pid); err != nil {
return fmt.Errorf("failed to add executor pid to init cgroup: %w", err)
}

// ability to freeze the cgroup
freeze := func() {
_ = new(fs.FreezerGroup).Set(path, frozen)
}

// ability to thaw the cgroup
thaw := func() {
_ = new(fs.FreezerGroup).Set(path, thawed)
}

// do the common kill logic
if err = d.kill(path, freeze, thaw); err != nil {
return err
}

// remove the cgroup from disk
return cgroups.RemovePath(path)
}

func (d *killer) v2(cgroup *configs.Cgroup) error {
if cgroup == nil {
return errors.New("missing cgroup")
}

path := filepath.Join(CgroupRoot, cgroup.Path)

existingPIDs, err := cgroups.GetPids(path)
if err != nil {
return fmt.Errorf("failed to determine pids in cgroup: %w", err)
}

d.logger.Trace("killing processes", "cgroup_path", path, "cgroup_version", "v2", "executor_pid", d.pid, "existing_pids", existingPIDs)

mgr, err := fs2.NewManager(cgroup, "", rootless)
if err != nil {
return fmt.Errorf("failed to create v2 cgroup manager: %w", err)
}

// move executor PID into the root init.scope so we can kill the task pids
// without killing the executor (which is the process running this code, doing
// the killing)
init, err := fs2.NewManager(nil, filepath.Join(CgroupRoot, "init.scope"), rootless)
if err != nil {
return fmt.Errorf("failed to create v2 init cgroup manager: %w", err)
}
if err = init.Apply(d.pid); err != nil {
return fmt.Errorf("failed to move executor pid into init.scope cgroup: %w", err)
}

d.logger.Trace("move of executor pid into init.scope complete", "pid", d.pid)

// ability to freeze the cgroup
freeze := func() {
_ = mgr.Freeze(configs.Frozen)
}

// ability to thaw the cgroup
thaw := func() {
_ = mgr.Freeze(configs.Thawed)
}

// do the common kill logic

if err = d.kill(path, freeze, thaw); err != nil {
return err
}

// remove the cgroup from disk
return mgr.Destroy()
}

// kill is used to SIGKILL all processes in cgroup
//
// The order of operations is
// 0. before calling this method, the executor pid has been moved outside of cgroup
// 1. freeze cgroup (so processes cannot fork further)
// 2. scan the cgroup to collect all pids
// 3. issue SIGKILL to each pid found
// 4. thaw the cgroup so processes can go die
// 5. wait on each processes until it is confirmed dead
func (d *killer) kill(cgroup string, freeze func(), thaw func()) error {
// freeze the cgroup stopping further forking
freeze()

d.logger.Trace("search for pids in", "cgroup", cgroup)

// find all the pids we intend to kill
pids, err := cgroups.GetPids(cgroup)
if err != nil {
// if we fail to get pids, re-thaw before bailing so there is at least
// a chance the processes can go die out of band
thaw()
return fmt.Errorf("failed to find pids: %w", err)
}

d.logger.Trace("send sigkill to frozen processes", "cgroup", cgroup, "pids", pids)

var processes []*os.Process

// kill the processes in cgroup
for _, pid := range pids {
p, findErr := os.FindProcess(pid)
if findErr != nil {
d.logger.Trace("failed to find process of pid to kill", "pid", pid, "error", findErr)
continue
}
processes = append(processes, p)
if killErr := p.Kill(); killErr != nil {
d.logger.Trace("failed to kill process", "pid", pid, "error", killErr)
continue
}
}

// thawed the cgroup so we can wait on each process
thaw()

// wait on each process
for _, p := range processes {
// do not capture error; errors are normal here
pState, _ := p.Wait()
d.logger.Trace("return from wait on process", "pid", p.Pid, "state", pState)
}

// cgroups are not atomic, the OS takes a moment to un-mark the cgroup as in-use;
// a tiny sleep here goes a long way for not creating noisy (but functionally benign)
// errors about removing busy cgroup
//
// alternatively we could do the removal in a loop and silence the interim errors, but meh
time.Sleep(50 * time.Millisecond)

return nil
}
13 changes: 13 additions & 0 deletions client/lib/resources/containment.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package resources

// A Containment will cleanup resources created by an executor.
type Containment interface {
// Apply enables containment on pid.
Apply(pid int) error

// Cleanup will purge executor resources like cgroups.
Cleanup() error

// GetPIDs will return the processes overseen by the Containment
GetPIDs() PIDs
}
Loading

0 comments on commit 1334712

Please sign in to comment.