Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalize memory and cpu tracking #19839

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 59 additions & 106 deletions src/main/java/com/google/devtools/build/lib/actions/ResourceManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,13 @@ public static ResourceManager instance() {
/** Returns prediction of RAM in Mb used by registered actions. */
@Override
public double getUsedMemoryInMb() {
return usedRam;
return usedResources.get("memory");
}

/** Returns prediction of CPUs used by registered actions. */
@Override
public double getUsedCPU() {
return usedCpu;
return usedResources.get("cpu");
}

// Allocated resources are allowed to go "negative", but at least
Expand All @@ -154,8 +154,7 @@ public double getUsedCPU() {
// Please note that this value is purely empirical - we assume that generally
// requested resources are somewhat pessimistic and thread would end up
// using less than requested amount.
private static final double MIN_NECESSARY_CPU_RATIO = 0.6;
private static final double MIN_NECESSARY_RAM_RATIO = 1.0;
private static final Map<String, Double> MIN_NECESSARY_RATIO = Map.of("cpu", 0.6);

// Lists of blocked threads. Associated CountDownLatch object will always
// be initialized to 1 during creation in the acquire() method.
Expand All @@ -179,18 +178,9 @@ public double getUsedCPU() {
// LocalHostCapacity.getLocalHostCapacity() as an argument.
@VisibleForTesting public ResourceSet availableResources = null;

// Used amount of CPU capacity (where 1.0 corresponds to the one fully
// occupied CPU core. Corresponds to the CPU resource definition in the
// ResourceSet class.
private double usedCpu;

// Used amount of RAM capacity in MB. Corresponds to the RAM resource
// definition in the ResourceSet class.
private double usedRam;

// Used amount of extra resources. Corresponds to the extra resource
// Used amount of resources. Corresponds to the resource
// definition in the ResourceSet class.
private Map<String, Float> usedExtraResources;
private Map<String, Double> usedResources;

// Used local test count. Corresponds to the local test count definition in the ResourceSet class.
private int usedLocalTestCount;
Expand All @@ -206,9 +196,7 @@ public static ResourceManager instanceForTestingOnly() {
* <p>Note - it does not reset available resources. Use separate call to setAvailableResources().
*/
public synchronized void resetResourceUsage() {
usedCpu = 0;
usedRam = 0;
usedExtraResources = new HashMap<>();
usedResources = new HashMap<>();
usedLocalTestCount = 0;
for (Pair<ResourceSet, LatchWithWorker> request : localRequests) {
request.second.latch.countDown();
Expand Down Expand Up @@ -298,20 +286,14 @@ public ResourceHandle acquireResources(
@Nullable
private Worker incrementResources(ResourceSet resources)
throws IOException, InterruptedException {
usedCpu += resources.getCpuUsage();
usedRam += resources.getMemoryMb();

resources
.getExtraResourceUsage()
.entrySet()
.getResources()
.forEach(
resource -> {
String key = (String) resource.getKey();
float value = resource.getValue();
if (usedExtraResources.containsKey(key)) {
value += (float) usedExtraResources.get(key);
(key, value) -> {
if (usedResources.containsKey(key)) {
value += usedResources.get(key);
}
usedExtraResources.put(key, value);
usedResources.put(key, value);
});

usedLocalTestCount += resources.getLocalTestCount();
Expand All @@ -324,9 +306,7 @@ private Worker incrementResources(ResourceSet resources)

/** Return true if any resources have been claimed through this manager. */
public synchronized boolean inUse() {
return usedCpu != 0.0
|| usedRam != 0.0
|| !usedExtraResources.isEmpty()
return !usedResources.isEmpty()
|| usedLocalTestCount != 0
|| !localRequests.isEmpty()
|| !dynamicWorkerRequests.isEmpty()
Expand Down Expand Up @@ -428,32 +408,23 @@ private boolean release(ResourceSet resources, @Nullable Worker worker)
}

private synchronized void releaseResourcesOnly(ResourceSet resources) {
usedCpu -= resources.getCpuUsage();
usedRam -= resources.getMemoryMb();

usedLocalTestCount -= resources.getLocalTestCount();

// TODO(bazel-team): (2010) rounding error can accumulate and value below can end up being
// e.g. 1E-15. So if it is small enough, we set it to 0. But maybe there is a better solution.
double epsilon = 0.0001;
if (usedCpu < epsilon) {
usedCpu = 0;
}
if (usedRam < epsilon) {
usedRam = 0;
}

Set<String> toRemove = new HashSet<>();
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
float value = (float) usedExtraResources.get(key) - resource.getValue();
usedExtraResources.put(key, value);
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();
double value = usedResources.getOrDefault(key, 0.0) - resource.getValue();
usedResources.put(key, value);
if (value < epsilon) {
toRemove.add(key);
}
}
for (String key : toRemove) {
usedExtraResources.remove(key);
usedResources.remove(key);
}
}

Expand Down Expand Up @@ -494,29 +465,27 @@ private synchronized void processWaitingThreads(Deque<Pair<ResourceSet, LatchWit
}

/** Throws an exception if requested extra resource isn't being tracked */
private void assertExtraResourcesTracked(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
if (!availableResources.getExtraResourceUsage().containsKey(key)) {
private void assertResourcesTracked(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();
if (!availableResources.getResources().containsKey(key)) {
throw new NoSuchElementException(
"Resource " + key + " is not tracked in this resource set.");
}
}
}

/** Return true iff all requested extra resources are considered to be available. */
private boolean areExtraResourcesAvailable(ResourceSet resources) throws NoSuchElementException {
for (Map.Entry<String, Float> resource : resources.getExtraResourceUsage().entrySet()) {
String key = (String) resource.getKey();
float used = (float) usedExtraResources.getOrDefault(key, 0f);
float requested = resource.getValue();
float available = availableResources.getExtraResourceUsage().get(key);
float epsilon = 0.0001f; // Account for possible rounding errors.
if (requested != 0.0 && used != 0.0 && requested + used > available + epsilon) {
return false;
}
}
return true;
private static <T extends Number> boolean isAvailable(T available, T used, T requested) {
// Resources are considered available if any one of the conditions below is true:
// 1) If resource is not requested at all, it is available.
// 2) If resource is not used at the moment, it is considered to be
// available regardless of how much is requested. This is necessary to
// ensure that at any given time, at least one thread is able to acquire
// resources even if it requests more than available.
// 3) If used resource amount is less than total available resource amount.
return requested.doubleValue() == 0
|| used.doubleValue() == 0
|| used.doubleValue() + requested.doubleValue() <= available.doubleValue();
}

// Method will return true if all requested resources are considered to be available.
Expand All @@ -527,59 +496,43 @@ boolean areResourcesAvailable(ResourceSet resources) throws NoSuchElementExcepti
// by the release() method.

WorkerKey workerKey = resources.getWorkerKey();
int availableWorkers = 0;
int activeWorkers = 0;
if (workerKey != null) {
availableWorkers = this.workerPool.getMaxTotalPerKey(workerKey);
activeWorkers = this.workerPool.getNumActive(workerKey);
int availableWorkers = this.workerPool.getMaxTotalPerKey(workerKey);
int activeWorkers = this.workerPool.getNumActive(workerKey);
if (activeWorkers >= availableWorkers) {
return false;
}
}
boolean workerIsAvailable = workerKey == null || (activeWorkers < availableWorkers);

// We test for tracking of extra resources whenever acquired and throw an
// exception before acquiring any untracked resource.
assertExtraResourcesTracked(resources);
assertResourcesTracked(resources);

if (usedCpu == 0.0
&& usedRam == 0.0
&& usedExtraResources.isEmpty()
&& usedLocalTestCount == 0
&& workerIsAvailable) {
if (usedResources.isEmpty() && usedLocalTestCount == 0) {
return true;
}
// Use only MIN_NECESSARY_???_RATIO of the resource value to check for
// allocation. This is necessary to account for the fact that most of the
// requested resource sets use pessimistic estimations. Note that this
// ratio is used only during comparison - for tracking we will actually
// mark whole requested amount as used.
double cpu = resources.getCpuUsage() * MIN_NECESSARY_CPU_RATIO;
double ram = resources.getMemoryMb() * MIN_NECESSARY_RAM_RATIO;
int localTestCount = resources.getLocalTestCount();

double availableCpu = availableResources.getCpuUsage();
double availableRam = availableResources.getMemoryMb();
int availableLocalTestCount = availableResources.getLocalTestCount();

double remainingRam = availableRam - usedRam;
int availableLocalTestCount = availableResources.getLocalTestCount();
if (!isAvailable(availableLocalTestCount, usedLocalTestCount, resources.getLocalTestCount())) {
return false;
}

// Resources are considered available if any one of the conditions below is true:
// 1) If resource is not requested at all, it is available.
// 2) If resource is not used at the moment, it is considered to be
// available regardless of how much is requested. This is necessary to
// ensure that at any given time, at least one thread is able to acquire
// resources even if it requests more than available.
// 3) If used resource amount is less than total available resource amount.
boolean cpuIsAvailable = cpu == 0.0 || usedCpu == 0.0 || usedCpu + cpu <= availableCpu;
boolean ramIsAvailable = ram == 0.0 || usedRam == 0.0 || ram <= remainingRam;
boolean localTestCountIsAvailable =
localTestCount == 0
|| usedLocalTestCount == 0
|| usedLocalTestCount + localTestCount <= availableLocalTestCount;
boolean extraResourcesIsAvailable = areExtraResourcesAvailable(resources);
return cpuIsAvailable
&& ramIsAvailable
&& extraResourcesIsAvailable
&& localTestCountIsAvailable
&& workerIsAvailable;
for (Map.Entry<String, Double> resource : resources.getResources().entrySet()) {
String key = resource.getKey();

// Use only MIN_NECESSARY_RATIO of the resource value to check for
// allocation. This is necessary to account for the fact that most of the
// requested resource sets use pessimistic estimations. Note that this
// ratio is used only during comparison - for tracking we will actually
// mark whole requested amount as used.
double requested = resource.getValue() * MIN_NECESSARY_RATIO.getOrDefault(key, 1.0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you define 1.0 in a static constant above together with MIN_NECESSARY_RATIO (e.g. DEFAULT_RATIO)? That'll make it clearer what happens if it's not specified in the MIN_NECESSARY_RATIO map.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

double used = usedResources.getOrDefault(key, 0.0);
double available = availableResources.get(key);
if (!isAvailable(available, used, requested)) {
return false;
}
}
return true;
}

@VisibleForTesting
Expand Down
Loading