Skip to content

Commit 507d320

Browse files
committed
1 parent daafc8a commit 507d320

File tree

1 file changed

+168
-0
lines changed
  • hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources

1 file changed

+168
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import org.apache.commons.io.FileUtils;
22+
import org.apache.commons.lang3.StringUtils;
23+
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
import org.apache.hadoop.util.CpuTimeTracker;
27+
import org.apache.hadoop.util.Shell;
28+
import org.apache.hadoop.util.SysInfoLinux;
29+
import org.apache.hadoop.yarn.exceptions.YarnException;
30+
import org.apache.hadoop.yarn.util.Clock;
31+
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
32+
import org.apache.hadoop.yarn.util.SystemClock;
33+
34+
import java.io.File;
35+
import java.io.IOException;
36+
import java.math.BigInteger;
37+
import java.nio.charset.StandardCharsets;
38+
import java.nio.file.Files;
39+
import java.nio.file.Path;
40+
import java.nio.file.Paths;
41+
import java.util.List;
42+
import java.util.Map;
43+
import java.util.concurrent.ConcurrentHashMap;
44+
import java.util.stream.Collectors;
45+
import java.util.stream.Stream;
46+
47+
/**
48+
* A CGroupV2 file-system based Resource calculator without the process tree features.
49+
*
50+
* The feature only works if cluster runs in pure V2 version, because when we read the
51+
* /proc/{pid}/cgroup file currently we can not handle multiple lines.
52+
*
53+
* Another limitation is virtual memory measurement. CGroups does not have the
54+
* ability to measure virtual memory usage. This includes memory reserved but
55+
* not used. CGroups measures used memory as sa sum of
56+
* physical memory and swap usage. This will be returned in the virtual
57+
* memory counters.
58+
* If the real virtual memory is required please use the legacy procfs based
59+
* resource calculator or CombinedResourceCalculator.
60+
*/
61+
public class CGroupsV2ResourceCalculator extends ResourceCalculatorProcessTree {
62+
private static final Logger LOG = LoggerFactory.getLogger(CGroupsV2ResourceCalculator.class);
63+
private final Map<String, String> stats = new ConcurrentHashMap<>();
64+
private final Clock clock = SystemClock.getInstance();
65+
private final String pid;
66+
67+
private long jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
68+
private CpuTimeTracker cpuTimeTracker;
69+
70+
/**
71+
* Create resource calculator for the container that has the specified pid.
72+
* @param pid A pid from the cgroup or null for all containers
73+
*/
74+
public CGroupsV2ResourceCalculator(String pid) {
75+
super(pid);
76+
this.pid = pid;
77+
}
78+
79+
@Override
80+
public void initialize() throws YarnException {
81+
jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
82+
cpuTimeTracker = new CpuTimeTracker(jiffyLengthMs);
83+
}
84+
85+
@Override
86+
public float getCpuUsagePercent() {
87+
return cpuTimeTracker.getCpuTrackerUsagePercent();
88+
}
89+
90+
@Override
91+
public long getCumulativeCpuTime() {
92+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files
93+
return jiffyLengthMs < 0
94+
? UNAVAILABLE
95+
: getStat("cpu.stat#usage_usec") * jiffyLengthMs;
96+
}
97+
98+
@Override
99+
public long getRssMemorySize(int olderThanAge) {
100+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
101+
return 1 < olderThanAge
102+
? UNAVAILABLE
103+
: getStat("memory.stat#anon");
104+
}
105+
106+
@Override
107+
public long getVirtualMemorySize(int olderThanAge) {
108+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
109+
return 1 < olderThanAge
110+
? UNAVAILABLE
111+
: getStat("memory.stat#vmalloc");
112+
}
113+
114+
@Override
115+
public String getProcessTreeDump() {
116+
// We do not have a process tree in cgroups return just the pid for tracking
117+
return pid;
118+
}
119+
120+
@Override
121+
public boolean checkPidPgrpidForMatch() {
122+
// We do not have a process tree in cgroups returning default ok
123+
return true;
124+
}
125+
126+
@Override
127+
public void updateProcessTree() {
128+
try (Stream<Path> cGroupFiles = Files.list(getCGroupPath())){
129+
List<Path> statFiles = cGroupFiles
130+
.filter(path -> path.endsWith(".stat"))
131+
.collect(Collectors.toList());
132+
for (Path statFile : statFiles) {
133+
String[] lines = fileToString(statFile.toString()).split(System.lineSeparator());
134+
for (String line: lines) {
135+
String[] parts = line.split(" ");
136+
stats.put(statFile.getFileName() + "#" + parts[0], parts[1]);
137+
}
138+
}
139+
cpuTimeTracker.updateElapsedJiffies(
140+
BigInteger.valueOf(getStat("cpu.stat#usage_usec")), clock.getTime());
141+
LOG.debug("The {} process has the following stat properties updated: {}", pid, stats);
142+
} catch (IOException e) {
143+
LOG.warn("Failed to read CGroupV2 stats for process: " + pid, e);
144+
}
145+
}
146+
147+
private Path getCGroupPath() throws IOException {
148+
String relativePath = pid == null
149+
? ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup("")
150+
: getCGroupRelativePathForPid();
151+
return Paths.get(ResourceHandlerModule.getCGroupsHandler().getCGroupMountPath(), relativePath);
152+
}
153+
154+
private String getCGroupRelativePathForPid() throws IOException {
155+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#processes
156+
String pidCGroupsFile = fileToString(Paths.get("/proc", pid, "cgroup").toString());
157+
String controllerPath = StringUtils.substringAfterLast(pidCGroupsFile, ":");
158+
return ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup(controllerPath);
159+
}
160+
161+
private long getStat(String key) {
162+
return Long.parseLong(stats.getOrDefault(key, "0"));
163+
}
164+
165+
private String fileToString(String path) throws IOException {
166+
return FileUtils.readFileToString(new File(path), StandardCharsets.UTF_8);
167+
}
168+
}

0 commit comments

Comments
 (0)