Skip to content

Commit 129d91f

Browse files
authored
YARN-11692. Support mixed cgroup v1/v2 controller structure (#6821)
1 parent cfdf1f5 commit 129d91f

File tree

7 files changed

+237
-117
lines changed

7 files changed

+237
-117
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2733,6 +2733,10 @@ public static boolean isAclEnabled(Configuration conf) {
27332733
public static final String NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH =
27342734
NM_PREFIX + "linux-container-executor.cgroups.mount-path";
27352735

2736+
/** Where the linux container executor should mount cgroups v2 if not found. */
2737+
public static final String NM_LINUX_CONTAINER_CGROUPS_V2_MOUNT_PATH =
2738+
NM_PREFIX + "linux-container-executor.cgroups.v2.mount-path";
2739+
27362740
/**
27372741
* Whether the apps should run in strict resource usage mode(not allowed to
27382742
* use spare CPU)

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2087,6 +2087,20 @@
20872087
<name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
20882088
</property>
20892089

2090+
<property>
2091+
<description>This property sets the mount path for CGroups v2.
2092+
This parameter is optional, and needed to be set only in mixed mode,
2093+
when CGroups v2 is mounted alongside with Cgroups v1.
2094+
For example, in hybrid mode, CGroups v1 controllers can be mounted under /sys/fs/cgroup/
2095+
(for example /sys/fs/cgroup/cpu,cpuacct), while v2 can be mounted in /sys/fs/cgroup/unified folder.
2096+
2097+
If this value is not set, the value of
2098+
yarn.nodemanager.linux-container-executor.cgroups.mount-path
2099+
will be used for CGroups v2 as well.
2100+
</description>
2101+
<name>yarn.nodemanager.linux-container-executor.cgroups.v2.mount-path</name>
2102+
</property>
2103+
20902104
<property>
20912105
<description>Delay in ms between attempts to remove linux cgroup</description>
20922106
<name>yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms</name>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsMountConfig.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,17 @@ public class CGroupsMountConfig {
2626
private final boolean enableMount;
2727
private final String mountPath;
2828

29+
// CGroups v2 mount path is only relevant in mixed CGroups v1/v2 mode,
30+
// where v2 is mounted alongside with v1.
31+
private final String v2MountPath;
32+
2933
public CGroupsMountConfig(Configuration conf) {
3034
this.enableMount = conf.getBoolean(YarnConfiguration.
3135
NM_LINUX_CONTAINER_CGROUPS_MOUNT, false);
3236
this.mountPath = conf.get(YarnConfiguration.
3337
NM_LINUX_CONTAINER_CGROUPS_MOUNT_PATH, null);
38+
this.v2MountPath = conf.get(YarnConfiguration.
39+
NM_LINUX_CONTAINER_CGROUPS_V2_MOUNT_PATH, mountPath);
3440
}
3541

3642
public boolean ensureMountPathIsDefined() throws ResourceHandlerException {
@@ -62,11 +68,16 @@ public String getMountPath() {
6268
return mountPath;
6369
}
6470

71+
public String getV2MountPath() {
72+
return v2MountPath;
73+
}
74+
6575
@Override
6676
public String toString() {
6777
return "CGroupsMountConfig{" +
6878
"enableMount=" + enableMount +
69-
", mountPath='" + mountPath + '\'' +
79+
", mountPath='" + mountPath +
80+
", v2MountPath='" + v2MountPath + '\'' +
7081
'}';
7182
}
7283
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2HandlerImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,8 @@ protected List<CGroupController> getCGroupControllers() {
9797
@Override
9898
protected Map<String, Set<String>> parsePreConfiguredMountPath() throws IOException {
9999
Map<String, Set<String>> controllerMappings = new HashMap<>();
100-
controllerMappings.put(this.cGroupsMountConfig.getMountPath(),
101-
readControllersFile(this.cGroupsMountConfig.getMountPath()));
100+
controllerMappings.put(this.cGroupsMountConfig.getV2MountPath(),
101+
readControllersFile(this.cGroupsMountConfig.getV2MountPath()));
102102
return controllerMappings;
103103
}
104104

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/ResourceHandlerModule.java

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -63,35 +63,55 @@ public class ResourceHandlerModule {
6363
* as resource metrics functionality. We need to ensure that the same
6464
* instance is used for both.
6565
*/
66+
private static volatile CGroupsHandler cGroupV1Handler;
67+
private static volatile CGroupsHandler cGroupV2Handler;
6668
private static volatile TrafficControlBandwidthHandlerImpl
6769
trafficControlBandwidthHandler;
6870
private static volatile NetworkPacketTaggingHandlerImpl
6971
networkPacketTaggingHandlerImpl;
70-
private static volatile CGroupsHandler cGroupsHandler;
7172
private static volatile CGroupsBlkioResourceHandlerImpl
7273
cGroupsBlkioResourceHandler;
7374
private static volatile MemoryResourceHandler
7475
cGroupsMemoryResourceHandler;
7576
private static volatile CpuResourceHandler
7677
cGroupsCpuResourceHandler;
7778

78-
/**
79-
* Returns an initialized, thread-safe CGroupsHandler instance.
80-
*/
81-
private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
79+
private static void initializeCGroupHandlers(Configuration conf)
80+
throws ResourceHandlerException {
81+
initializeCGroupV1Handler(conf);
82+
if (cgroupsV2Enabled) {
83+
initializeCGroupV2Handler(conf);
84+
}
85+
}
86+
87+
private static void initializeCGroupV1Handler(Configuration conf)
8288
throws ResourceHandlerException {
83-
if (cGroupsHandler == null) {
89+
if (cGroupV1Handler == null) {
8490
synchronized (CGroupsHandler.class) {
85-
if (cGroupsHandler == null) {
86-
cGroupsHandler = cgroupsV2Enabled
87-
? new CGroupsV2HandlerImpl(conf, PrivilegedOperationExecutor.getInstance(conf))
88-
: new CGroupsHandlerImpl(conf, PrivilegedOperationExecutor.getInstance(conf));
89-
LOG.debug("Value of CGroupsHandler is: {}", cGroupsHandler);
91+
if (cGroupV1Handler == null) {
92+
cGroupV1Handler = new CGroupsHandlerImpl(
93+
conf, PrivilegedOperationExecutor.getInstance(conf));
94+
LOG.debug("Value of CGroupsV1Handler is: {}", cGroupV1Handler);
9095
}
9196
}
9297
}
98+
}
9399

94-
return cGroupsHandler;
100+
private static void initializeCGroupV2Handler(Configuration conf)
101+
throws ResourceHandlerException {
102+
if (cGroupV2Handler == null) {
103+
synchronized (CGroupsHandler.class) {
104+
if (cGroupV2Handler == null) {
105+
cGroupV2Handler = new CGroupsV2HandlerImpl(
106+
conf, PrivilegedOperationExecutor.getInstance(conf));
107+
LOG.debug("Value of CGroupsV2Handler is: {}", cGroupV2Handler);
108+
}
109+
}
110+
}
111+
}
112+
113+
private static boolean isMountedInCGroupsV2(CGroupsHandler.CGroupController controller) {
114+
return (cGroupV2Handler != null && cGroupV2Handler.getControllerPath(controller) != null);
95115
}
96116

97117
/**
@@ -101,18 +121,18 @@ private static CGroupsHandler getInitializedCGroupsHandler(Configuration conf)
101121
*/
102122

103123
public static CGroupsHandler getCGroupsHandler() {
104-
return cGroupsHandler;
124+
return cGroupV1Handler;
105125
}
106126

107127
/**
108128
* Returns relative root for cgroups. Returns null if cGroupsHandler is
109129
* not initialized, or if the path is empty.
110130
*/
111131
public static String getCgroupsRelativeRoot() {
112-
if (cGroupsHandler == null) {
132+
if (getCGroupsHandler() == null) {
113133
return null;
114134
}
115-
String cGroupPath = cGroupsHandler.getRelativePathForCGroup("");
135+
String cGroupPath = getCGroupsHandler().getRelativePathForCGroup("");
116136
if (cGroupPath == null || cGroupPath.isEmpty()) {
117137
return null;
118138
}
@@ -153,9 +173,13 @@ private static CpuResourceHandler initCGroupsCpuResourceHandler(
153173
synchronized (CpuResourceHandler.class) {
154174
if (cGroupsCpuResourceHandler == null) {
155175
LOG.debug("Creating new cgroups cpu handler");
156-
cGroupsCpuResourceHandler = cgroupsV2Enabled
157-
? new CGroupsV2CpuResourceHandlerImpl(getInitializedCGroupsHandler(conf))
158-
: new CGroupsCpuResourceHandlerImpl(getInitializedCGroupsHandler(conf));
176+
177+
initializeCGroupHandlers(conf);
178+
if (isMountedInCGroupsV2(CGroupsHandler.CGroupController.CPU)) {
179+
cGroupsCpuResourceHandler = new CGroupsV2CpuResourceHandlerImpl(cGroupV2Handler);
180+
} else {
181+
cGroupsCpuResourceHandler = new CGroupsCpuResourceHandlerImpl(cGroupV1Handler);
182+
}
159183
return cGroupsCpuResourceHandler;
160184
}
161185
}
@@ -173,9 +197,11 @@ private static CpuResourceHandler initCGroupsCpuResourceHandler(
173197
synchronized (OutboundBandwidthResourceHandler.class) {
174198
if (trafficControlBandwidthHandler == null) {
175199
LOG.info("Creating new traffic control bandwidth handler.");
200+
201+
initializeCGroupHandlers(conf);
176202
trafficControlBandwidthHandler = new
177203
TrafficControlBandwidthHandlerImpl(PrivilegedOperationExecutor
178-
.getInstance(conf), getInitializedCGroupsHandler(conf),
204+
.getInstance(conf), cGroupV1Handler,
179205
new TrafficController(conf, PrivilegedOperationExecutor
180206
.getInstance(conf)));
181207
}
@@ -208,10 +234,11 @@ public static ResourceHandler getNetworkTaggingHandler(Configuration conf)
208234
synchronized (OutboundBandwidthResourceHandler.class) {
209235
if (networkPacketTaggingHandlerImpl == null) {
210236
LOG.info("Creating new network-tagging-handler.");
237+
238+
initializeCGroupHandlers(conf);
211239
networkPacketTaggingHandlerImpl =
212240
new NetworkPacketTaggingHandlerImpl(
213-
PrivilegedOperationExecutor.getInstance(conf),
214-
getInitializedCGroupsHandler(conf));
241+
PrivilegedOperationExecutor.getInstance(conf), cGroupV1Handler);
215242
}
216243
}
217244
}
@@ -239,9 +266,10 @@ private static CGroupsBlkioResourceHandlerImpl getCgroupsBlkioResourceHandler(
239266
synchronized (DiskResourceHandler.class) {
240267
if (cGroupsBlkioResourceHandler == null) {
241268
LOG.debug("Creating new cgroups blkio handler");
269+
270+
initializeCGroupHandlers(conf);
242271
cGroupsBlkioResourceHandler =
243-
new CGroupsBlkioResourceHandlerImpl(
244-
getInitializedCGroupsHandler(conf));
272+
new CGroupsBlkioResourceHandlerImpl(cGroupV1Handler);
245273
}
246274
}
247275
}
@@ -263,9 +291,13 @@ public static MemoryResourceHandler initMemoryResourceHandler(
263291
if (cGroupsMemoryResourceHandler == null) {
264292
synchronized (MemoryResourceHandler.class) {
265293
if (cGroupsMemoryResourceHandler == null) {
266-
cGroupsMemoryResourceHandler = cgroupsV2Enabled
267-
? new CGroupsV2MemoryResourceHandlerImpl(getInitializedCGroupsHandler(conf))
268-
: new CGroupsMemoryResourceHandlerImpl(getInitializedCGroupsHandler(conf));
294+
295+
initializeCGroupHandlers(conf);
296+
if (isMountedInCGroupsV2(CGroupsHandler.CGroupController.MEMORY)) {
297+
cGroupsMemoryResourceHandler = new CGroupsV2MemoryResourceHandlerImpl(cGroupV2Handler);
298+
} else {
299+
cGroupsMemoryResourceHandler = new CGroupsMemoryResourceHandlerImpl(cGroupV1Handler);
300+
}
269301
}
270302
}
271303
}
@@ -327,9 +359,10 @@ private static void addHandlersFromConfiguredResourcePlugins(
327359
}
328360

329361
for (ResourcePlugin plugin : pluginMap.values()) {
362+
initializeCGroupHandlers(conf);
330363
addHandlerIfNotNull(handlerList,
331364
plugin.createResourceHandler(nmContext,
332-
getInitializedCGroupsHandler(conf),
365+
cGroupV1Handler,
333366
PrivilegedOperationExecutor.getInstance(conf)));
334367
}
335368
}
@@ -360,21 +393,6 @@ static void nullifyResourceHandlerChain() throws ResourceHandlerException {
360393
resourceHandlerChain = null;
361394
}
362395

363-
@VisibleForTesting
364-
static void resetCgroupsHandler() {
365-
cGroupsHandler = null;
366-
}
367-
368-
@VisibleForTesting
369-
static void resetCpuResourceHandler() {
370-
cGroupsCpuResourceHandler = null;
371-
}
372-
373-
@VisibleForTesting
374-
static void resetMemoryResourceHandler() {
375-
cGroupsMemoryResourceHandler = null;
376-
}
377-
378396
/**
379397
* If a cgroup mount directory is specified, it returns cgroup directories
380398
* with valid names.

0 commit comments

Comments
 (0)