Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cgroups] Extended cgroup support #21322

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/metrics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/profiler:network_metrics_collector",
"//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups",
"//src/main/java/com/google/devtools/build/lib/skyframe:execution_finished_event",
"//src/main/java/com/google/devtools/build/lib/skyframe:sky_key_stats",
"//src/main/java/com/google/devtools/build/lib/skyframe:skyframe_stats",
Expand Down Expand Up @@ -99,6 +100,7 @@ java_library(
deps = [
"//src/main/java/com/google/devtools/build/lib/clock",
"//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.clock.Clock;
import com.google.devtools.build.lib.sandbox.CgroupsInfo;
import com.google.devtools.build.lib.sandbox.Cgroup;
import java.util.Map;

/** Collects resource usage of processes from their cgroups {@code CgroupsInfo}. */
Expand All @@ -35,11 +35,11 @@ public static CgroupsInfoCollector instance() {
return instance;
}

public ResourceSnapshot collectResourceUsage(Map<Long, CgroupsInfo> pidToCgroups, Clock clock) {
public ResourceSnapshot collectResourceUsage(Map<Long, Cgroup> pidToCgroups, Clock clock) {
ImmutableMap.Builder<Long, Integer> pidToMemoryInKb = ImmutableMap.builder();

for (Map.Entry<Long, CgroupsInfo> entry : pidToCgroups.entrySet()) {
CgroupsInfo cgroup = entry.getValue();
for (Map.Entry<Long, Cgroup> entry : pidToCgroups.entrySet()) {
Cgroup cgroup = entry.getValue();
// TODO(b/292634407): Consider how to handle the unlikely case where only some cgroups are
// invalid.
if (cgroup.exists()) {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/sandbox/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/exec/local",
"//src/main/java/com/google/devtools/build/lib/exec/local:options",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups",
"//src/main/java/com/google/devtools/build/lib/shell",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand All @@ -233,6 +234,7 @@ java_library(
java_library(
name = "cgroups_info",
srcs = [
"Cgroup.java",
"CgroupsInfo.java",
"CgroupsInfoV1.java",
"CgroupsInfoV2.java",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package com.google.devtools.build.lib.sandbox;

public interface Cgroup {
int getMemoryUsageInKb();
boolean exists();
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import javax.annotation.Nullable;

/** This class manages cgroups directories for memory-limiting sandboxed processes. */
public abstract class CgroupsInfo {
public abstract class CgroupsInfo implements Cgroup {

private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public static BindMount of(Path mountPoint, Path source) {
private boolean enablePseudoterminal = false;
private String sandboxDebugPath = null;
private boolean sigintSendsSigterm = false;
private String cgroupsDir;
private Set<java.nio.file.Path> cgroupsDirs = ImmutableSet.of();

private LinuxSandboxCommandLineBuilder(Path linuxSandboxPath) {
this.linuxSandboxPath = linuxSandboxPath;
Expand Down Expand Up @@ -229,8 +229,8 @@ public LinuxSandboxCommandLineBuilder setSandboxDebugPath(String sandboxDebugPat
* this directory, its parent directory, and the cgroup directory for the Bazel process.
*/
@CanIgnoreReturnValue
public LinuxSandboxCommandLineBuilder setCgroupsDir(String cgroupsDir) {
this.cgroupsDir = cgroupsDir;
public LinuxSandboxCommandLineBuilder setCgroupsDirs(Set<java.nio.file.Path> cgroupsDirs) {
this.cgroupsDirs = cgroupsDirs;
return this;
}

Expand Down Expand Up @@ -312,8 +312,8 @@ public ImmutableList<String> buildForCommand(List<String> commandArguments) {
if (persistentProcess) {
commandLineBuilder.add("-p");
}
if (cgroupsDir != null) {
commandLineBuilder.add("-C", cgroupsDir);
for (java.nio.file.Path dir: cgroupsDirs) {
commandLineBuilder.add("-C", dir.toString());
}
commandLineBuilder.add("--");
commandLineBuilder.addAll(commandArguments);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import com.google.devtools.build.lib.sandbox.LinuxSandboxCommandLineBuilder.BindMount;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxInputs;
import com.google.devtools.build.lib.sandbox.SandboxHelpers.SandboxOutputs;
import com.google.devtools.build.lib.sandbox.cgroups.VirtualCGroup;
import com.google.devtools.build.lib.sandbox.cgroups.VirtualCGroupFactory;
import com.google.devtools.build.lib.shell.Command;
import com.google.devtools.build.lib.shell.CommandException;
import com.google.devtools.build.lib.util.OS;
Expand Down Expand Up @@ -140,6 +142,7 @@ private static boolean computeIsSupported(CommandEnvironment cmdEnv, Path linuxS
private final Reporter reporter;
private final ImmutableList<Root> packageRoots;
private String cgroupsDir;
private final VirtualCGroupFactory cgroupFactory;

/**
* Creates a sandboxed spawn runner that uses the {@code linux-sandbox} tool.
Expand All @@ -160,6 +163,15 @@ private static boolean computeIsSupported(CommandEnvironment cmdEnv, Path linuxS
Duration timeoutKillDelay,
TreeDeleter treeDeleter) {
super(cmdEnv);
SandboxOptions sandboxOptions = cmdEnv.getOptions().getOptions(SandboxOptions.class);
this.cgroupFactory =
sandboxOptions == null || !sandboxOptions.useNewCgroupImplementation ?
null :
new VirtualCGroupFactory(
"sandbox_",
VirtualCGroup.getInstance(),
getSandboxOptions().getLimits(),
/* alwaysCreate= */ false);
this.helpers = helpers;
this.fileSystem = cmdEnv.getRuntime().getFileSystem();
this.blazeDirs = cmdEnv.getDirectories();
Expand Down Expand Up @@ -322,15 +334,22 @@ protected SandboxedSpawn prepareSpawn(Spawn spawn, SpawnExecutionContext context
commandLineBuilder.setSandboxDebugPath(sandboxDebugPath.getPathString());
}

if (sandboxOptions.memoryLimitMb > 0) {
if (cgroupFactory != null) {
ImmutableMap<String, Double> spawnResourceLimits = ImmutableMap.of();
if (sandboxOptions.enforceResources.regexPattern().matcher(spawn.getMnemonic()).matches()) {
spawnResourceLimits = spawn.getLocalResources().getResources();
}
VirtualCGroup cgroup = cgroupFactory.create(context.getId(), spawnResourceLimits);
commandLineBuilder.setCgroupsDirs(cgroup.paths());
} else if (sandboxOptions.memoryLimitMb > 0) {
// We put the sandbox inside a unique subdirectory using the context's ID. This ID is
// unique per spawn run by this spawn runner.
CgroupsInfo sandboxCgroup =
CgroupsInfo.getBlazeSpawnsCgroup()
.createIndividualSpawnCgroup(
"sandbox_" + context.getId(), sandboxOptions.memoryLimitMb);
if (sandboxCgroup.exists()) {
commandLineBuilder.setCgroupsDir(sandboxCgroup.getCgroupDir().toString());
commandLineBuilder.setCgroupsDirs(ImmutableSet.of(sandboxCgroup.getCgroupDir().toPath()));
}
}

Expand Down Expand Up @@ -491,6 +510,11 @@ public void verifyPostCondition(
if (getSandboxOptions().useHermetic) {
checkForConcurrentModifications(context);
}
// We cannot leave the cgroups around and delete them only when we delete the sandboxes
// because linux has a hard limit of 65535 memory controllers.
// Ref. https://github.com/torvalds/linux/blob/58d4e450a490d5f02183f6834c12550ba26d3b47/include/linux/memcontrol.h#L69
if (cgroupFactory != null)
cgroupFactory.remove(context.getId());
}

private void checkForConcurrentModifications(SpawnExecutionContext context)
Expand Down Expand Up @@ -558,6 +582,7 @@ public void cleanupSandboxBase(Path sandboxBase, TreeDeleter treeDeleter) throws
if (cgroupsDir != null) {
new File(cgroupsDir).delete();
}
VirtualCGroup.deleteInstance();
// Delete the inaccessible files synchronously, bypassing the treeDeleter. They are only a
// couple of files that can be deleted fast, and ensuring they are gone at the end of every
// build avoids annoying permission denied errors if the user happens to run "rm -rf" on the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,21 @@
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.common.options.Converter;
import com.google.devtools.common.options.Converters.BooleanConverter;
import com.google.devtools.common.options.Converters.RegexPatternConverter;
import com.google.devtools.common.options.Converters.TriStateConverter;
import com.google.devtools.common.options.Option;
import com.google.devtools.common.options.OptionDocumentationCategory;
import com.google.devtools.common.options.OptionEffectTag;
import com.google.devtools.common.options.OptionMetadataTag;
import com.google.devtools.common.options.OptionsBase;
import com.google.devtools.common.options.OptionsParsingException;
import com.google.devtools.common.options.RegexPatternOption;
import com.google.devtools.common.options.TriState;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/** Options for sandboxed execution. */
public class SandboxOptions extends OptionsBase {
Expand Down Expand Up @@ -369,6 +373,53 @@ public ImmutableSet<Path> getInaccessiblePaths(FileSystem fs) {
+ " Requires cgroups v1 or v2 and permissions for the users to the cgroups dir.")
public int memoryLimitMb;

@Option(
name = "experimental_sandbox_limits",
Copy link
Contributor

@zhengwei143 zhengwei143 Feb 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of introducing a separate mechanism for sandboxed spawns, I wonder if it's better to unify this with the specified resource_set of the sandboxed action. Then just use --experimental_sandbox_enforce_resources_regexp to control for which actions to enforce this on.

Is there a reason to have this as a separate mechanism?

cc @wilwell.

Copy link
Contributor Author

@AlessandroPatti AlessandroPatti Feb 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a couple of reasons why I find this flag very useful:

  • You cannot override the resource set of arbitrary actions, only tests (see Consistent resource management across actions and resource types #19679) and for most actions the default resource set is underestimated
  • I found it particularly useful to pass --experimental_sandbox_enforce_resources_regexp=DO_NOT_MATCH --experimental_sandbox_limits=cpu=<n> <target> to test <target> with a different limit other then the one specified at the target level. Without this flag, I'd have to go and change the BUILD file, which makes this harder to iterate on.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You cannot override the resource set of arbitrary actions

By this I assume you mean actions outside of your control - e.g. in build rules not owned by your project / default starlark resources.

for most actions the default resource set is underestimated

While I don't think fast testing / iteration is a good reason to introduce this (whatever value specified here should be relatively stable without users having to tinker frequently), I can see a legitimate case for this in the event that resources are underestimated. Since the ResourceManager makes reservations purely off of estimated values (and kind-of makes a claim that the action will have some minimum amount of resources1), this serves as a mechanism to upper bound resources.


1 Not entirely true since Bazel assumes it can use all the resources available to the machine and doesn't actively monitor actually available resources.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add this as a separate flag that overrides --experimental_sandbox_memory_limit_mb (and document that behavior) so we don't make this a breaking change. Internally I don't believe we use this flag, but I'm not sure about external users.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

defaultValue = "null",
documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.EXECUTION},
converter = ResourceConverter.AssignmentConverter.class,
allowMultiple = true,
help =
"If > 0, each Linux sandbox will be limited to the given amount"
+ " for the specified resource. Requires --incompatible_use_new_cgroup_implementation"
+ " and overrides --experimental_sandbox_memory_limit_mb."
+ " Requires cgroups v1 or v2 and permissions for the users to the cgroups dir.")
AlessandroPatti marked this conversation as resolved.
Show resolved Hide resolved
public List<Map.Entry<String, Double>> limits;

public ImmutableMap<String, Double> getLimits() {
return ImmutableMap.<String, Double> builder()
.put("memory", (double) memoryLimitMb)
.putAll(limits)
.buildKeepingLast();
}

@Option(
name = "incompatible_use_new_cgroup_implementation",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.EXECUTION},
converter = BooleanConverter.class,
help =
"If true, use the new implementation for cgroups. The old implementation only supports"
+ " the memory controller and ignores the value of --experimental_sandbox_limits.")
public boolean useNewCgroupImplementation;

@Option(
name = "experimental_sandbox_enforce_resources_regexp",
defaultValue = "",
documentationCategory = OptionDocumentationCategory.EXECUTION_STRATEGY,
effectTags = {OptionEffectTag.EXECUTION},
converter = RegexPatternConverter.class,
help =
"If true, actions whose mnemonic matches the input regex"
+ " will have their resources request enforced as limits, overriding"
+ " the value of --experimental_sandbox_limits, if the resource type"
+ " supports it. For example a test that declares"
+ " cpu:3 and resources:memory:10, will run with at most 3 cpus and 10"
+ " megabytes of memory.")
public RegexPatternOption enforceResources;

/** Converter for the number of threads used for asynchronous tree deletion. */
public static final class AsyncTreeDeletesConverter extends ResourceConverter.IntegerConverter {
public AsyncTreeDeletesConverter() {
Expand Down
41 changes: 41 additions & 0 deletions src/main/java/com/google/devtools/build/lib/sandbox/cgroups/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
load("@rules_java//java:defs.bzl", "java_library")

package(default_applicable_licenses = ["//:license"])

filegroup(
name = "srcs",
srcs = glob(["**"]) + [
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups/controller:srcs",
],
visibility = ["//src:__subpackages__"],
)

java_library(
name = "cgroups",
srcs = [
"Hierarchy.java",
"Mount.java",
"VirtualCGroup.java",
"VirtualCGroupFactory.java",
],
deps = [
"//src/main/java/com/google/devtools/build/lib/sandbox:cgroups_info",
"//src/main/java/com/google/devtools/build/lib/actions:exec_exception",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups/controller",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups/controller/v1",
"//src/main/java/com/google/devtools/build/lib/sandbox/cgroups/controller/v2",
"//src/main/protobuf:failure_details_java_proto",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
],
visibility = [
"//src/main/java/com/google/devtools/build/lib/metrics:__subpackages__",
"//src/main/java/com/google/devtools/build/lib/sandbox:__subpackages__",
"//src/main/java/com/google/devtools/build/lib/worker:__subpackages__",
"//src/test/java/com/google/devtools/build/lib/metrics:__subpackages__",
"//src/test/java/com/google/devtools/build/lib/sandbox/cgroups:__subpackages__",
"//src/test/java/com/google/devtools/build/lib/worker:__subpackages__",
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package com.google.devtools.build.lib.sandbox.cgroups;

import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.common.io.Files;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@AutoValue
public abstract class Hierarchy {
public abstract Integer id();
public abstract List<String> controllers();
public abstract Path path();
public boolean isV2() {
return controllers().size() == 1 && controllers().contains("") && id() == 0;
}

/**
* A regexp that matches entries in {@code /proc/self/cgroup}.
*
* The format is documented in https://man7.org/linux/man-pages/man7/cgroups.7.html
*/
private static final Pattern PROC_CGROUPS_PATTERN =
Pattern.compile("^(?<id>\\d+):(?<controllers>[^:]*):(?<file>.+)");

static Hierarchy create(Integer id, List<String> controllers, Path path) {
return new AutoValue_Hierarchy(id, controllers, path);
}

static List<Hierarchy> parse(File procCgroup) throws IOException {
ImmutableList.Builder<Hierarchy> hierarchies = ImmutableList.builder();
for (String line : Files.readLines(procCgroup, StandardCharsets.UTF_8)) {
Matcher m = PROC_CGROUPS_PATTERN.matcher(line);
if (!m.matches()) {
continue;
}

Integer id = Integer.parseInt(m.group("id"));
String path = m.group("file");
List<String> cs = ImmutableList.copyOf(m.group("controllers").split(","));
hierarchies.add(Hierarchy.create(id, cs, Paths.get(path)));
}
return hierarchies.build();
}
}
Loading
Loading