Skip to content

Commit 9b7e44c

Browse files
committed
YARN-11687. Update CGroupsResourceCalculator to track usages using cgroupv2
- port CGroupsResourceCalculator to V2
1 parent daafc8a commit 9b7e44c

File tree

2 files changed

+211
-0
lines changed

2 files changed

+211
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import org.apache.commons.io.FileUtils;
22+
import org.apache.commons.lang3.StringUtils;
23+
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
27+
import org.apache.hadoop.classification.VisibleForTesting;
28+
import org.apache.hadoop.util.CpuTimeTracker;
29+
import org.apache.hadoop.util.Shell;
30+
import org.apache.hadoop.util.SysInfoLinux;
31+
import org.apache.hadoop.yarn.exceptions.YarnException;
32+
import org.apache.hadoop.yarn.util.Clock;
33+
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
34+
import org.apache.hadoop.yarn.util.SystemClock;
35+
36+
import java.io.IOException;
37+
import java.math.BigInteger;
38+
import java.nio.charset.StandardCharsets;
39+
import java.nio.file.Files;
40+
import java.nio.file.Path;
41+
import java.nio.file.Paths;
42+
import java.util.List;
43+
import java.util.Map;
44+
import java.util.concurrent.ConcurrentHashMap;
45+
import java.util.stream.Collectors;
46+
import java.util.stream.Stream;
47+
48+
/**
49+
* A CGroupV2 file-system based Resource calculator without the process tree features.
50+
*
51+
* The feature only works if cluster runs in pure V2 version, because when we read the
52+
* /proc/{pid}/cgroup file currently we can not handle multiple lines.
53+
*/
54+
public class CGroupsV2ResourceCalculator extends ResourceCalculatorProcessTree {
55+
private static final Logger LOG = LoggerFactory.getLogger(CGroupsV2ResourceCalculator.class);
56+
private final Map<String, String> stats = new ConcurrentHashMap<>();
57+
private final Clock clock = SystemClock.getInstance();
58+
private final String pid;
59+
60+
@VisibleForTesting
61+
long jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
62+
@VisibleForTesting
63+
CpuTimeTracker cpuTimeTracker;
64+
65+
/**
66+
* Create resource calculator for the container that has the specified pid.
67+
* @param pid A pid from the cgroup or null for all containers
68+
*/
69+
public CGroupsV2ResourceCalculator(String pid) {
70+
super(pid);
71+
this.pid = pid;
72+
}
73+
74+
@Override
75+
public void initialize() throws YarnException {
76+
jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
77+
cpuTimeTracker = new CpuTimeTracker(jiffyLengthMs);
78+
}
79+
80+
@Override
81+
public float getCpuUsagePercent() {
82+
return cpuTimeTracker.getCpuTrackerUsagePercent();
83+
}
84+
85+
@Override
86+
public long getCumulativeCpuTime() {
87+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files
88+
return jiffyLengthMs < 0
89+
? UNAVAILABLE
90+
: getStat("cpu.stat#usage_usec") * jiffyLengthMs;
91+
}
92+
93+
@Override
94+
public long getRssMemorySize(int olderThanAge) {
95+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
96+
return 1 < olderThanAge
97+
? UNAVAILABLE
98+
: getStat("memory.stat#anon");
99+
}
100+
101+
@Override
102+
public long getVirtualMemorySize(int olderThanAge) {
103+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
104+
return 1 < olderThanAge
105+
? UNAVAILABLE
106+
: getStat("memory.stat#vmalloc");
107+
}
108+
109+
@Override
110+
public String getProcessTreeDump() {
111+
// We do not have a process tree in cgroups return just the pid for tracking
112+
return pid;
113+
}
114+
115+
@Override
116+
public boolean checkPidPgrpidForMatch() {
117+
// We do not have a process tree in cgroups returning default ok
118+
return true;
119+
}
120+
121+
@Override
122+
public void updateProcessTree() {
123+
try (Stream<Path> cGroupFiles = Files.list(getCGroupPath())){
124+
List<Path> statFiles = cGroupFiles
125+
.filter(path -> path.endsWith(".stat"))
126+
.collect(Collectors.toList());
127+
for (Path statFile : statFiles) {
128+
String[] lines = fileToString(statFile).split(System.lineSeparator());
129+
for (String line: lines) {
130+
String[] parts = line.split(" ");
131+
stats.put(statFile.getFileName() + "#" + parts[0], parts[1]);
132+
}
133+
}
134+
cpuTimeTracker.updateElapsedJiffies(
135+
BigInteger.valueOf(getStat("cpu.stat#usage_usec")), clock.getTime());
136+
LOG.debug("The {} process has the following stat properties updated: {}", pid, stats);
137+
} catch (Exception e) {
138+
LOG.warn("Failed to read CGroupV2 stats for process: " + pid, e);
139+
}
140+
}
141+
142+
private Path getCGroupPath() throws IOException {
143+
String relativePath = pid == null
144+
? ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup("")
145+
: getCGroupRelativePathForPid();
146+
return Paths.get(ResourceHandlerModule.getCGroupsHandler().getCGroupMountPath(), relativePath);
147+
}
148+
149+
private String getCGroupRelativePathForPid() throws IOException {
150+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#processes
151+
String pidCGroupsFile = fileToString(Paths.get("/proc", pid, "cgroup"));
152+
String controllerPath = StringUtils.substringAfterLast(pidCGroupsFile, ":");
153+
return ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup(controllerPath);
154+
}
155+
156+
private long getStat(String key) {
157+
return Long.parseLong(stats.getOrDefault(key, "0"));
158+
}
159+
160+
private String fileToString(Path path) throws IOException {
161+
return FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8);
162+
}
163+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import org.junit.Test;
22+
23+
import org.apache.hadoop.util.CpuTimeTracker;
24+
25+
import static org.mockito.ArgumentMatchers.any;
26+
import static org.mockito.ArgumentMatchers.anyLong;
27+
import static org.mockito.Mockito.mock;
28+
import static org.mockito.Mockito.never;
29+
import static org.mockito.Mockito.verify;
30+
31+
/**
32+
* Unit test for CGroupsV2ResourceCalculator.
33+
*/
34+
public class TestCGroupsV2ResourceCalculator {
35+
36+
@Test
37+
public void testPidNotFound() {
38+
CGroupsV2ResourceCalculator tested = createCalculator();
39+
tested.updateProcessTree();
40+
verify(tested.cpuTimeTracker, never()).updateElapsedJiffies(any(), anyLong());
41+
}
42+
43+
private CGroupsV2ResourceCalculator createCalculator() {
44+
CGroupsV2ResourceCalculator calculator = new CGroupsV2ResourceCalculator("1");
45+
calculator.cpuTimeTracker = mock(CpuTimeTracker.class);
46+
return calculator;
47+
}
48+
}

0 commit comments

Comments
 (0)