Skip to content

Commit

Permalink
fix: update the cgroups for Talos core services
Browse files Browse the repository at this point in the history
There was a bit of a mess here which worked fine until we bumped
runc/containerd, and the problem shows up in Talos-in-Kubernetes tests.

Use consistently `runner.WithCgroupPath`, as it handles cgroup nesting
for cases when Talos runs in a container.

Assign each service its own unique cgroup.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Jun 6, 2024
1 parent fe317f1 commit 80ca8ff
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ func CreateSystemCgroups(runtime.Sequence, any) (runtime.TaskExecutionFunc, stri
return fmt.Errorf("error initializing cgroups root path: %w", err)
}

logger.Printf("using cgroups root: %s", cgroup.Root())

groups := []struct {
name string
resources *cgroup2.Resources
Expand Down
1 change: 1 addition & 0 deletions internal/app/machined/pkg/system/services/apid.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ func (o *APID) Runner(r runtime.Runtime) (runner.Runner, error) {
runner.WithLoggingManager(r.Logging()),
runner.WithContainerdAddress(constants.SystemContainerdAddress),
runner.WithEnv(env),
runner.WithCgroupPath(constants.CgroupApid),
runner.WithOCISpecOpts(
oci.WithDroppedCapabilities(cap.Known()),
oci.WithHostNamespace(specs.NetworkNamespace),
Expand Down
1 change: 1 addition & 0 deletions internal/app/machined/pkg/system/services/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ func (e *Etcd) Runner(r runtime.Runtime) (runner.Runner, error) {
runner.WithNamespace(constants.SystemContainerdNamespace),
runner.WithContainerImage(e.imgRef),
runner.WithEnv(env),
runner.WithCgroupPath(constants.CgroupEtcd),
runner.WithOCISpecOpts(
oci.WithDroppedCapabilities(cap.Known()),
oci.WithHostNamespace(specs.NetworkNamespace),
Expand Down
2 changes: 1 addition & 1 deletion internal/app/machined/pkg/system/services/extension.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ func (svc *Extension) getOCIOptions(envVars []string, mounts []specs.Mount) []oc
ociOpts := []oci.SpecOpts{
oci.WithRootFSPath(filepath.Join(constants.ExtensionServiceRootfsPath, svc.Spec.Name)),
containerd.WithRootfsPropagation(svc.Spec.Container.Security.RootfsPropagation),
oci.WithCgroup(filepath.Join(constants.CgroupExtensions, svc.Spec.Name)),
oci.WithMounts(mounts),
oci.WithHostNamespace(specs.NetworkNamespace),
oci.WithSelinuxLabel(""),
Expand Down Expand Up @@ -216,6 +215,7 @@ func (svc *Extension) Runner(r runtime.Runtime) (runner.Runner, error) {
runner.WithContainerdAddress(constants.SystemContainerdAddress),
runner.WithEnv(environment.Get(r.Config())),
runner.WithOCISpecOpts(ociSpecOpts...),
runner.WithCgroupPath(filepath.Join(constants.CgroupExtensions, svc.Spec.Name)),
runner.WithOOMScoreAdj(-600),
),
restart.WithType(restartType),
Expand Down
2 changes: 1 addition & 1 deletion internal/app/machined/pkg/system/services/kubelet.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ func (k *Kubelet) Runner(r runtime.Runtime) (runner.Runner, error) {
runner.WithNamespace(constants.SystemContainerdNamespace),
runner.WithContainerImage(k.imgRef),
runner.WithEnv(environment.Get(r.Config())),
runner.WithCgroupPath(constants.CgroupKubelet),
runner.WithOCISpecOpts(
containerd.WithRootfsPropagation("shared"),
oci.WithCgroup(constants.CgroupKubelet),
oci.WithMounts(mounts),
oci.WithHostNamespace(specs.NetworkNamespace),
oci.WithHostNamespace(specs.PIDNamespace),
Expand Down
1 change: 1 addition & 0 deletions internal/app/machined/pkg/system/services/trustd.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ func (t *Trustd) Runner(r runtime.Runtime) (runner.Runner, error) {
runner.WithLoggingManager(r.Logging()),
runner.WithContainerdAddress(constants.SystemContainerdAddress),
runner.WithEnv(env),
runner.WithCgroupPath(constants.CgroupTrustd),
runner.WithOCISpecOpts(
containerd.WithMemoryLimit(int64(1000000*512)),
oci.WithDroppedCapabilities(cap.Known()),
Expand Down
9 changes: 9 additions & 0 deletions pkg/machinery/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,12 @@ const (
// CgroupSystemRuntime is the cgroup name for containerd runtime processes.
CgroupSystemRuntime = CgroupSystem + "/runtime"

// CgroupApid is the cgroup name for apid runtime processes.
CgroupApid = CgroupSystem + "/apid"

// CgroupTrustd is the cgroup name for trustd runtime processes.
CgroupTrustd = CgroupSystem + "/trustd"

// CgroupUdevd is the cgroup name for udevd runtime processes.
CgroupUdevd = CgroupSystem + "/udevd"

Expand All @@ -681,6 +687,9 @@ const (
// CgroupPodRuntimeReservedMemory is the hard memory protection for the cri runtime processes.
CgroupPodRuntimeReservedMemory = 128 * 1024 * 1024

// CgroupEtcd is the cgroup name for etcd process.
CgroupEtcd = "/podruntime/etcd"

// CgroupKubelet is the cgroup name for kubelet process.
CgroupKubelet = "/podruntime/kubelet"

Expand Down

0 comments on commit 80ca8ff

Please sign in to comment.