Skip to content

Commit ccb8ff4

Browse files
K0K0V0Kbrumi1024
andauthored
YARN-11687. CGroupV2 resource calculator (#6835)
Co-authored-by: Benjamin Teke <brumi1024@users.noreply.github.com>
1 parent 6c08e8e commit ccb8ff4

File tree

11 files changed

+710
-583
lines changed

11 files changed

+710
-583
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020

2121
import java.lang.reflect.Constructor;
2222

23-
import org.slf4j.Logger;
24-
import org.slf4j.LoggerFactory;
2523
import org.apache.hadoop.classification.InterfaceAudience.Public;
2624
import org.apache.hadoop.classification.InterfaceStability.Evolving;
2725
import org.apache.hadoop.conf.Configuration;
@@ -37,8 +35,6 @@
3735
@Public
3836
@Evolving
3937
public abstract class ResourceCalculatorProcessTree extends Configured {
40-
static final Logger LOG = LoggerFactory
41-
.getLogger(ResourceCalculatorProcessTree.class);
4238
public static final int UNAVAILABLE = -1;
4339

4440
/**
@@ -169,7 +165,6 @@ public float getCpuUsagePercent() {
169165
*/
170166
public static ResourceCalculatorProcessTree getResourceCalculatorProcessTree(
171167
String pid, Class<? extends ResourceCalculatorProcessTree> clazz, Configuration conf) {
172-
173168
if (clazz != null) {
174169
try {
175170
Constructor <? extends ResourceCalculatorProcessTree> c = clazz.getConstructor(String.class);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,11 @@ public String getCGroupMountPath() {
559559
return this.cGroupsMountConfig.getMountPath();
560560
}
561561

562+
@Override
563+
public String getCGroupV2MountPath() {
564+
return this.cGroupsMountConfig.getV2MountPath();
565+
}
566+
562567
@Override
563568
public String toString() {
564569
return CGroupsHandlerImpl.class.getName() + "{" +
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import java.io.IOException;
22+
import java.math.BigInteger;
23+
import java.nio.charset.StandardCharsets;
24+
import java.nio.file.Path;
25+
import java.nio.file.Paths;
26+
import java.util.Arrays;
27+
import java.util.Collections;
28+
import java.util.List;
29+
import java.util.Map;
30+
import java.util.concurrent.ConcurrentHashMap;
31+
32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
34+
35+
import org.apache.commons.io.FileUtils;
36+
import org.apache.hadoop.classification.VisibleForTesting;
37+
import org.apache.hadoop.util.CpuTimeTracker;
38+
import org.apache.hadoop.util.SysInfoLinux;
39+
import org.apache.hadoop.yarn.exceptions.YarnException;
40+
import org.apache.hadoop.yarn.util.Clock;
41+
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
42+
import org.apache.hadoop.yarn.util.SystemClock;
43+
44+
/**
45+
* Common code base for the CGroupsResourceCalculator implementations.
46+
*/
47+
public abstract class AbstractCGroupsResourceCalculator extends ResourceCalculatorProcessTree {
48+
private static final Logger LOG =
49+
LoggerFactory.getLogger(AbstractCGroupsResourceCalculator.class);
50+
private final String pid;
51+
private final Clock clock = SystemClock.getInstance();
52+
private final Map<String, String> stats = new ConcurrentHashMap<>();
53+
54+
private long jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
55+
private CpuTimeTracker cpuTimeTracker;
56+
private CGroupsHandler cGroupsHandler;
57+
private String procFs = "/proc";
58+
59+
private final List<String> totalJiffiesKeys;
60+
private final String rssMemoryKey;
61+
private final String virtualMemoryKey;
62+
63+
protected AbstractCGroupsResourceCalculator(
64+
String pid,
65+
List<String> totalJiffiesKeys,
66+
String rssMemoryKey,
67+
String virtualMemoryKey
68+
) {
69+
super(pid);
70+
this.pid = pid;
71+
this.totalJiffiesKeys = totalJiffiesKeys;
72+
this.rssMemoryKey = rssMemoryKey;
73+
this.virtualMemoryKey = virtualMemoryKey;
74+
}
75+
76+
@Override
77+
public void initialize() throws YarnException {
78+
cpuTimeTracker = new CpuTimeTracker(jiffyLengthMs);
79+
cGroupsHandler = ResourceHandlerModule.getCGroupsHandler();
80+
}
81+
82+
@Override
83+
public long getCumulativeCpuTime() {
84+
long totalJiffies = getTotalJiffies();
85+
return jiffyLengthMs == UNAVAILABLE || totalJiffies == UNAVAILABLE
86+
? UNAVAILABLE
87+
: getTotalJiffies() * jiffyLengthMs;
88+
}
89+
90+
@Override
91+
public long getRssMemorySize(int olderThanAge) {
92+
return 1 < olderThanAge ? UNAVAILABLE : getStat(rssMemoryKey);
93+
}
94+
95+
@Override
96+
public long getVirtualMemorySize(int olderThanAge) {
97+
return 1 < olderThanAge ? UNAVAILABLE : getStat(virtualMemoryKey);
98+
}
99+
100+
@Override
101+
public String getProcessTreeDump() {
102+
// We do not have a process tree in cgroups return just the pid for tracking
103+
return pid;
104+
}
105+
106+
@Override
107+
public boolean checkPidPgrpidForMatch() {
108+
// We do not have a process tree in cgroups returning default ok
109+
return true;
110+
}
111+
112+
@Override
113+
public float getCpuUsagePercent() {
114+
return cpuTimeTracker.getCpuTrackerUsagePercent();
115+
}
116+
117+
@Override
118+
public void updateProcessTree() {
119+
stats.clear();
120+
for (Path statFile : getCGroupFilesToLoadInStats()) {
121+
try {
122+
List<String> lines = fileToLines(statFile);
123+
if (1 == lines.size()) {
124+
addSingleLineToStat(statFile, lines.get(0));
125+
} else if (1 < lines.size()) {
126+
addMultiLineToStat(statFile, lines);
127+
}
128+
} catch (IOException e) {
129+
LOG.debug(String.format("Failed to read cgroup file %s for pid %s", statFile, pid), e);
130+
}
131+
}
132+
LOG.debug("After updateProcessTree the {} pid has stats {}", pid, stats);
133+
cpuTimeTracker.updateElapsedJiffies(BigInteger.valueOf(getTotalJiffies()), clock.getTime());
134+
}
135+
136+
private void addSingleLineToStat(Path file, String line) {
137+
Path fileName = file.getFileName();
138+
if (fileName != null) {
139+
stats.put(fileName.toString(), line.trim());
140+
}
141+
}
142+
143+
private void addMultiLineToStat(Path file, List<String> lines) {
144+
for (String line : lines) {
145+
String[] parts = line.split(" ");
146+
if (1 < parts.length) {
147+
stats.put(file.getFileName() + "#" + parts[0], parts[1]);
148+
}
149+
}
150+
}
151+
152+
private long getTotalJiffies() {
153+
Long reduce = totalJiffiesKeys.stream()
154+
.map(this::getStat)
155+
.filter(statValue -> statValue != UNAVAILABLE)
156+
.reduce(0L, Long::sum);
157+
return reduce == 0 ? UNAVAILABLE : reduce;
158+
}
159+
160+
private long getStat(String key) {
161+
return Long.parseLong(stats.getOrDefault(key, String.valueOf(UNAVAILABLE)));
162+
}
163+
164+
protected abstract List<Path> getCGroupFilesToLoadInStats();
165+
166+
protected List<String> readLinesFromCGroupFileFromProcDir() throws IOException {
167+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#processes
168+
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/cgroups.html
169+
Path cgroup = Paths.get(procFs, pid, "cgroup");
170+
List<String> result = Arrays.asList(fileToString(cgroup).split(System.lineSeparator()));
171+
LOG.debug("The {} pid has the following lines in the procfs cgroup file {}", pid, result);
172+
return result;
173+
}
174+
175+
protected String fileToString(Path path) throws IOException {
176+
return FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8).trim();
177+
}
178+
179+
protected List<String> fileToLines(Path path) throws IOException {
180+
return !path.toFile().exists() ? Collections.emptyList()
181+
: Arrays.asList(FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8)
182+
.trim().split(System.lineSeparator()));
183+
}
184+
185+
@VisibleForTesting
186+
void setJiffyLengthMs(long jiffyLengthMs) {
187+
this.jiffyLengthMs = jiffyLengthMs;
188+
}
189+
190+
@VisibleForTesting
191+
void setCpuTimeTracker(CpuTimeTracker cpuTimeTracker) {
192+
this.cpuTimeTracker = cpuTimeTracker;
193+
}
194+
195+
@VisibleForTesting
196+
void setcGroupsHandler(CGroupsHandler cGroupsHandler) {
197+
this.cGroupsHandler = cGroupsHandler;
198+
}
199+
200+
@VisibleForTesting
201+
void setProcFs(String procFs) {
202+
this.procFs = procFs;
203+
}
204+
205+
public CGroupsHandler getcGroupsHandler() {
206+
return cGroupsHandler;
207+
}
208+
209+
public String getPid() {
210+
return pid;
211+
}
212+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,4 +239,10 @@ String getCGroupParam(CGroupController controller, String cGroupId,
239239
* @return parameter value as read from the parameter file
240240
*/
241241
String getCGroupMountPath();
242+
243+
/**
244+
* Returns CGroupV2 Mount Path.
245+
* @return parameter value as read from the parameter file
246+
*/
247+
String getCGroupV2MountPath();
242248
}

0 commit comments

Comments
 (0)