Skip to content

Commit b29c86e

Browse files
author
Zhankun Tang
committed
YARN-6507. NM side FPGA resource handler abstraction component
1 parent d8bab94 commit b29c86e

File tree

11 files changed

+854
-1
lines changed

11 files changed

+854
-1
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,6 +1288,27 @@ public static boolean isAclEnabled(Configuration conf) {
12881288
public static final String NM_NETWORK_RESOURCE_OUTBOUND_BANDWIDTH_YARN_MBIT =
12891289
NM_NETWORK_RESOURCE_PREFIX + "outbound-bandwidth-yarn-mbit";
12901290

1291+
/**
1292+
* Prefix for FPGA configurations. Work in progress: This configuration
1293+
* parameter may be changed/removed in the future.
1294+
*/
1295+
@Private
1296+
public static final String NM_FPGA_RESOURCE_PREFIX = NM_PREFIX
1297+
+ "resource.fpga.";
1298+
1299+
/**
1300+
* This setting controls if resource handling for FPGA operations is enabled.
1301+
*/
1302+
@Private
1303+
public static final String NM_FPGA_RESOURCE_ENABLED =
1304+
NM_FPGA_RESOURCE_PREFIX + "enabled";
1305+
1306+
/**
1307+
* FPGA as a resource is disabled by default.
1308+
**/
1309+
@Private
1310+
public static final boolean DEFAULT_NM_FPGA_RESOURCE_ENABLED = false;
1311+
12911312
/** NM Webapp address.**/
12921313
public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address";
12931314
public static final int DEFAULT_NM_WEBAPP_PORT = 8042;

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceUtils.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,31 @@ public static Map<String, ResourceInformation> getNodeResourceInformation(
392392
return readOnlyNodeResources;
393393
}
394394

395+
/**
396+
* Function to get the device allowed infomation. The value format should be comma separated majorNumber:minorNumber
397+
*
398+
* <property>
399+
* <name>yarn.nodemanager.resource-types.MCP.allowed</name>
400+
* <value>244:0,245:1</value>
401+
* </property>
402+
* @return a map of resource type and allowed value string
403+
* */
404+
public static Map<String, String> getResourceTypeAllowedValue(Configuration conf) {
405+
Map<String, String> allowedDevices = new HashMap<>();
406+
for (Map.Entry<String, String> entry : conf) {
407+
String key = entry.getKey();
408+
String value = entry.getValue();
409+
if (key.startsWith(YarnConfiguration.NM_RESOURCES_PREFIX)) {
410+
String[] parts = key.split("\\.");
411+
LOG.info("Found allowed device resource entry " + key);
412+
if (parts.length == 5 && parts[4].equalsIgnoreCase("allowed")) {
413+
allowedDevices.put(parts[3], value);
414+
}
415+
}
416+
}
417+
return allowedDevices;
418+
}
419+
395420
private static Map<String, ResourceInformation>
396421
initializeNodeResourceInformation(Configuration conf) {
397422
Map<String, ResourceInformation> nodeResources = new HashMap<>();

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ enum CGroupController {
3939
CPU("cpu"),
4040
NET_CLS("net_cls"),
4141
BLKIO("blkio"),
42-
MEMORY("memory");
42+
MEMORY("memory"),
43+
DEVICES("devices");
4344

4445
private final String name;
4546

@@ -65,6 +66,7 @@ String getName() {
6566
String CGROUP_CPU_QUOTA_US = "cfs_quota_us";
6667
String CGROUP_CPU_SHARES = "shares";
6768

69+
String CGROUP_PARAM_DEVICE_DENY = "deny";
6870
/**
6971
* Mounts or initializes a cgroup controller.
7072
* @param controller - the controller being initialized
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
20+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.Fpga;
21+
22+
import org.apache.hadoop.classification.InterfaceAudience;
23+
import org.apache.hadoop.classification.InterfaceStability;
24+
import org.apache.hadoop.conf.Configuration;
25+
26+
import java.util.List;
27+
28+
29+
/**
30+
* FPGA plugin interface for vendor to implement
31+
*
32+
* */
33+
34+
@InterfaceAudience.Private
35+
@InterfaceStability.Unstable
36+
public interface AbstractFpgaPlugin {
37+
38+
boolean initPlugin(String s, Configuration configuration);
39+
40+
String getExistingIPID(int major, int minor);
41+
42+
String getFpgaType();
43+
44+
/**
45+
* the vendor should check if the IP file has already been downloaded
46+
* */
47+
String downloadIP(String id, String dstDir);
48+
49+
boolean configureIP(String ipPath, FpgaResourceAllocator.FpgaAllocation fpgaAllocations);
50+
51+
boolean cleanupFpgas(FpgaResourceAllocator.FpgaAllocation fpgaAllocations);
52+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
20+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.Fpga;
21+
22+
import org.apache.hadoop.conf.Configuration;
23+
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
24+
25+
import java.util.ArrayList;
26+
import java.util.HashMap;
27+
import java.util.List;
28+
29+
public class FpgaPluginChain{
30+
31+
private HashMap<String, AbstractFpgaPlugin> plugins;
32+
33+
FpgaPluginChain(){}
34+
35+
FpgaPluginChain(HashMap<String, AbstractFpgaPlugin> plugins) {
36+
this.plugins = plugins;
37+
}
38+
39+
40+
public void addPlugin(AbstractFpgaPlugin plugin) {
41+
if (null == plugins) {
42+
plugins = new HashMap<>();
43+
}
44+
plugins.put(plugin.getFpgaType(), plugin);
45+
}
46+
47+
public AbstractFpgaPlugin getPlugin(String type) {
48+
return plugins.get(type);
49+
}
50+
51+
public List<AbstractFpgaPlugin> getPlugins() {
52+
return new ArrayList<>(plugins.values());
53+
}
54+
55+
public boolean initPlugin(String s, Configuration configuration) {
56+
for (AbstractFpgaPlugin plugin : plugins.values()) {
57+
if (!plugin.initPlugin(s,configuration)) {
58+
return false;
59+
}
60+
}
61+
return true;
62+
}
63+
64+
}

0 commit comments

Comments
 (0)