Skip to content

Commit 3859fa7

Browse files
committed
YARN-6924. Metrics for Federation AMRMProxy. Contributed by Young Chen
1 parent 69faaa1 commit 3859fa7

File tree

3 files changed

+460
-55
lines changed

3 files changed

+460
-55
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.amrmproxy;
20+
21+
import com.google.common.annotations.VisibleForTesting;
22+
import org.apache.hadoop.metrics2.MetricsInfo;
23+
import org.apache.hadoop.metrics2.annotation.Metric;
24+
import org.apache.hadoop.metrics2.annotation.Metrics;
25+
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
26+
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
27+
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
28+
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
29+
import org.apache.hadoop.metrics2.lib.MutableRate;
30+
31+
import static org.apache.hadoop.metrics2.lib.Interns.info;
32+
33+
@Metrics(about = "Metrics for AMRMProxy", context = "fedr")
34+
public final class AMRMProxyMetrics {
35+
36+
private static final MetricsInfo RECORD_INFO =
37+
info("AMRMProxyMetrics", "Metrics for the AMRMProxy");
38+
@Metric("# of failed applications start requests")
39+
private MutableGaugeLong failedAppStartRequests;
40+
@Metric("# of failed register AM requests")
41+
private MutableGaugeLong failedRegisterAMRequests;
42+
@Metric("# of failed finish AM requests")
43+
private MutableGaugeLong failedFinishAMRequests;
44+
@Metric("# of failed allocate requests ")
45+
private MutableGaugeLong failedAllocateRequests;
46+
@Metric("# of failed application recoveries")
47+
private MutableGaugeLong failedAppRecoveryCount;
48+
// Aggregate metrics are shared, and don't have to be looked up per call
49+
@Metric("Application start request latency(ms)")
50+
private MutableRate totalSucceededAppStartRequests;
51+
@Metric("Register application master latency(ms)")
52+
private MutableRate totalSucceededRegisterAMRequests;
53+
@Metric("Finish application master latency(ms)")
54+
private MutableRate totalSucceededFinishAMRequests;
55+
@Metric("Allocate latency(ms)")
56+
private MutableRate totalSucceededAllocateRequests;
57+
// Quantile latency in ms - this is needed for SLA (95%, 99%, etc)
58+
private MutableQuantiles applicationStartLatency;
59+
private MutableQuantiles registerAMLatency;
60+
private MutableQuantiles finishAMLatency;
61+
private MutableQuantiles allocateLatency;
62+
private static volatile AMRMProxyMetrics instance = null;
63+
private MetricsRegistry registry;
64+
65+
private AMRMProxyMetrics() {
66+
registry = new MetricsRegistry(RECORD_INFO);
67+
registry.tag(RECORD_INFO, "AMRMProxy");
68+
69+
applicationStartLatency = registry
70+
.newQuantiles("applicationStartLatency", "latency of app start", "ops",
71+
"latency", 10);
72+
registerAMLatency = registry
73+
.newQuantiles("registerAMLatency", "latency of register AM", "ops",
74+
"latency", 10);
75+
finishAMLatency = registry
76+
.newQuantiles("finishAMLatency", "latency of finish AM", "ops",
77+
"latency", 10);
78+
allocateLatency = registry
79+
.newQuantiles("allocateLatency", "latency of allocate", "ops",
80+
"latency", 10);
81+
}
82+
83+
/**
84+
* Initialize the singleton instance.
85+
*
86+
* @return the singleton
87+
*/
88+
public static AMRMProxyMetrics getMetrics() {
89+
synchronized (AMRMProxyMetrics.class) {
90+
if (instance == null) {
91+
instance = DefaultMetricsSystem.instance()
92+
.register("AMRMProxyMetrics", "Metrics for the Yarn AMRMProxy",
93+
new AMRMProxyMetrics());
94+
}
95+
}
96+
return instance;
97+
}
98+
99+
@VisibleForTesting
100+
long getNumSucceededAppStartRequests() {
101+
return totalSucceededAppStartRequests.lastStat().numSamples();
102+
}
103+
104+
@VisibleForTesting
105+
double getLatencySucceededAppStartRequests() {
106+
return totalSucceededAppStartRequests.lastStat().mean();
107+
}
108+
109+
public void succeededAppStartRequests(long duration) {
110+
totalSucceededAppStartRequests.add(duration);
111+
applicationStartLatency.add(duration);
112+
}
113+
114+
@VisibleForTesting
115+
long getNumSucceededRegisterAMRequests() {
116+
return totalSucceededRegisterAMRequests.lastStat().numSamples();
117+
}
118+
119+
@VisibleForTesting
120+
double getLatencySucceededRegisterAMRequests() {
121+
return totalSucceededRegisterAMRequests.lastStat().mean();
122+
}
123+
124+
public void succeededRegisterAMRequests(long duration) {
125+
totalSucceededRegisterAMRequests.add(duration);
126+
registerAMLatency.add(duration);
127+
}
128+
129+
@VisibleForTesting
130+
long getNumSucceededFinishAMRequests() {
131+
return totalSucceededFinishAMRequests.lastStat().numSamples();
132+
}
133+
134+
@VisibleForTesting
135+
double getLatencySucceededFinishAMRequests() {
136+
return totalSucceededFinishAMRequests.lastStat().mean();
137+
}
138+
139+
public void succeededFinishAMRequests(long duration) {
140+
totalSucceededFinishAMRequests.add(duration);
141+
finishAMLatency.add(duration);
142+
}
143+
144+
@VisibleForTesting
145+
long getNumSucceededAllocateRequests() {
146+
return totalSucceededAllocateRequests.lastStat().numSamples();
147+
}
148+
149+
@VisibleForTesting
150+
double getLatencySucceededAllocateRequests() {
151+
return totalSucceededAllocateRequests.lastStat().mean();
152+
}
153+
154+
public void succeededAllocateRequests(long duration) {
155+
totalSucceededAllocateRequests.add(duration);
156+
allocateLatency.add(duration);
157+
}
158+
159+
long getFailedAppStartRequests() {
160+
return failedAppStartRequests.value();
161+
}
162+
163+
public void incrFailedAppStartRequests() {
164+
failedAppStartRequests.incr();
165+
}
166+
167+
long getFailedRegisterAMRequests() {
168+
return failedRegisterAMRequests.value();
169+
}
170+
171+
public void incrFailedRegisterAMRequests() {
172+
failedRegisterAMRequests.incr();
173+
}
174+
175+
long getFailedFinishAMRequests() {
176+
return failedFinishAMRequests.value();
177+
}
178+
179+
public void incrFailedFinishAMRequests() {
180+
failedFinishAMRequests.incr();
181+
}
182+
183+
long getFailedAllocateRequests() {
184+
return failedAllocateRequests.value();
185+
}
186+
187+
public void incrFailedAllocateRequests() {
188+
failedAllocateRequests.incr();
189+
}
190+
191+
long getFailedAppRecoveryCount() {
192+
return failedAppRecoveryCount.value();
193+
}
194+
195+
public void incrFailedAppRecoveryCount() {
196+
failedAppRecoveryCount.incr();
197+
}
198+
}

0 commit comments

Comments
 (0)