Skip to content

Commit 6529085

Browse files
authored
YARN-11588. [Federation] [Addendum] Fix uncleaned threads in yarn router thread pool executor. (#6222)
1 parent 821ed83 commit 6529085

File tree

4 files changed

+53
-2
lines changed

4 files changed

+53
-2
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4369,6 +4369,22 @@ public static boolean isAclEnabled(Configuration conf) {
43694369
public static final long DEFAULT_ROUTER_USER_CLIENT_THREAD_POOL_KEEP_ALIVE_TIME =
43704370
TimeUnit.SECONDS.toMillis(0); // 0s
43714371

4372+
/**
4373+
* This method configures the policy for core threads regarding termination
4374+
* when no tasks arrive within the keep-alive time.
4375+
* When set to false, core threads are never terminated due to a lack of tasks.
4376+
* When set to true, the same keep-alive policy
4377+
* that applies to non-core threads also applies to core threads.
4378+
* To prevent constant thread replacement,
4379+
* ensure that the keep-alive time is greater than zero when setting it to true.
4380+
* It's advisable to call this method before the pool becomes actively used.
4381+
*/
4382+
public static final String ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT =
4383+
ROUTER_PREFIX + "interceptor.user-thread-pool.allow-core-thread-time-out";
4384+
4385+
public static final boolean DEFAULT_ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT =
4386+
false;
4387+
43724388
/** The address of the Router web application. */
43734389
public static final String ROUTER_WEBAPP_ADDRESS =
43744390
ROUTER_WEBAPP_PREFIX + "address";

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5139,6 +5139,23 @@
51395139
</description>
51405140
</property>
51415141

5142+
<property>
5143+
<name>yarn.router.interceptor.user-thread-pool.allow-core-thread-time-out</name>
5144+
<value>false</value>
5145+
<description>
5146+
This method configures the policy for core threads regarding termination
5147+
when no tasks arrive within the keep-alive time.
5148+
When set to false, core threads are never terminated due to a lack of tasks.
5149+
When set to true, the same keep-alive policy
5150+
that applies to non-core threads also applies to core threads.
5151+
To prevent constant thread replacement,
5152+
ensure that the keep-alive time is greater than zero when setting it to true.
5153+
It's advisable to call this method before the pool becomes actively used.
5154+
We need to ensure that
5155+
yarn.router.interceptor.user-thread-pool.keep-alive-time is greater than 0.
5156+
</description>
5157+
</property>
5158+
51425159
<property>
51435160
<name>yarn.router.submit.interval.time</name>
51445161
<value>10ms</value>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/clientrm/FederationClientInterceptor.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,13 @@ public void init(String userName) {
231231
keepAliveTime, TimeUnit.MILLISECONDS, workQueue, threadFactory);
232232

233233
// Adding this line so that unused user threads will exit and be cleaned up if idle for too long
234-
this.executorService.allowCoreThreadTimeOut(true);
234+
boolean allowCoreThreadTimeOut = getConf().getBoolean(
235+
YarnConfiguration.ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT,
236+
YarnConfiguration.DEFAULT_ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT);
237+
238+
if (keepAliveTime > 0 && allowCoreThreadTimeOut) {
239+
this.executorService.allowCoreThreadTimeOut(allowCoreThreadTimeOut);
240+
}
235241

236242
final Configuration conf = this.getConf();
237243

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/rmadmin/FederationRMAdminInterceptor.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,21 @@ public void init(String userName) {
130130
ThreadFactory threadFactory = new ThreadFactoryBuilder()
131131
.setNameFormat("RPC Router RMAdminClient-" + userName + "-%d ").build();
132132

133+
long keepAliveTime = getConf().getTimeDuration(
134+
YarnConfiguration.ROUTER_USER_CLIENT_THREAD_POOL_KEEP_ALIVE_TIME,
135+
YarnConfiguration.DEFAULT_ROUTER_USER_CLIENT_THREAD_POOL_KEEP_ALIVE_TIME, TimeUnit.SECONDS);
136+
133137
BlockingQueue<Runnable> workQueue = new LinkedBlockingQueue<>();
134138
this.executorService = new ThreadPoolExecutor(numThreads, numThreads,
135-
0L, TimeUnit.MILLISECONDS, workQueue, threadFactory);
139+
keepAliveTime, TimeUnit.MILLISECONDS, workQueue, threadFactory);
140+
141+
boolean allowCoreThreadTimeOut = getConf().getBoolean(
142+
YarnConfiguration.ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT,
143+
YarnConfiguration.DEFAULT_ROUTER_USER_CLIENT_THREAD_POOL_ALLOW_CORE_THREAD_TIMEOUT);
144+
145+
if (keepAliveTime > 0 && allowCoreThreadTimeOut) {
146+
this.executorService.allowCoreThreadTimeOut(allowCoreThreadTimeOut);
147+
}
136148

137149
federationFacade = FederationStateStoreFacade.getInstance(this.getConf());
138150
this.conf = this.getConf();

0 commit comments

Comments
 (0)