Skip to content

Commit d566e4e

Browse files
tomscuttasanuma
authored andcommitted
HDFS-15892. Add metric for editPendingQ in FSEditLogAsync (#2770)
Signed-off-by: Takanobu Asanuma <tasanuma@apache.org> (cherry picked from commit 4bd0412)
1 parent 5458ebf commit d566e4e

File tree

3 files changed

+19
-0
lines changed

3 files changed

+19
-0
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a
216216
| `EditLogTailIntervalNumOps` | Total number of intervals between edit log tailings by standby NameNode |
217217
| `EditLogTailIntervalAvgTime` | Average time of intervals between edit log tailings by standby NameNode in milliseconds |
218218
| `EditLogTailInterval`*num*`s(50/75/90/95/99)thPercentileLatency` | The 50/75/90/95/99th percentile of time between edit log tailings by standby NameNode in milliseconds (*num* seconds granularity). Percentile measurement is off by default, by watching no intervals. The intervals are specified by `dfs.metrics.percentiles.intervals`. |
219+
| `PendingEditsCount` | Current number of pending edits |
219220

220221
FSNamesystem
221222
------------

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogAsync.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.concurrent.atomic.AtomicBoolean;
2929
import java.util.concurrent.atomic.AtomicInteger;
3030

31+
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
32+
import org.apache.hadoop.util.Time;
3133
import org.slf4j.Logger;
3234
import org.slf4j.LoggerFactory;
3335
import org.apache.hadoop.conf.Configuration;
@@ -53,6 +55,8 @@ class FSEditLogAsync extends FSEditLog implements Runnable {
5355
// of the edit log buffer - ie. a sync will eventually be forced.
5456
private final Deque<Edit> syncWaitQ = new ArrayDeque<Edit>();
5557

58+
private long lastFull = 0;
59+
5660
FSEditLogAsync(Configuration conf, NNStorage storage, List<URI> editsDirs) {
5761
super(conf, storage, editsDirs);
5862
// op instances cannot be shared due to queuing for background thread.
@@ -188,6 +192,11 @@ private void enqueueEdit(Edit edit) {
188192
if (!editPendingQ.offer(edit)) {
189193
Preconditions.checkState(
190194
isSyncThreadAlive(), "sync thread is not alive");
195+
long now = Time.monotonicNow();
196+
if (now - lastFull > 4000) {
197+
lastFull = now;
198+
LOG.info("Edit pending queue is full");
199+
}
191200
if (Thread.holdsLock(this)) {
192201
// if queue is full, synchronized caller must immediately relinquish
193202
// the monitor before re-offering to avoid deadlock with sync thread
@@ -225,15 +234,18 @@ private Edit dequeueEdit() throws InterruptedException {
225234
public void run() {
226235
try {
227236
while (true) {
237+
NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
228238
boolean doSync;
229239
Edit edit = dequeueEdit();
230240
if (edit != null) {
231241
// sync if requested by edit log.
232242
doSync = edit.logEdit();
233243
syncWaitQ.add(edit);
244+
metrics.setPendingEditsCount(editPendingQ.size() + 1);
234245
} else {
235246
// sync when editq runs dry, but have edits pending a sync.
236247
doSync = !syncWaitQ.isEmpty();
248+
metrics.setPendingEditsCount(0);
237249
}
238250
if (doSync) {
239251
// normally edit log exceptions cause the NN to terminate, but tests

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ public class NameNodeMetrics {
8585
MutableGaugeInt blockOpsQueued;
8686
@Metric("Number of blockReports and blockReceivedAndDeleted batch processed")
8787
MutableCounterLong blockOpsBatched;
88+
@Metric("Number of pending edits")
89+
MutableGaugeInt pendingEditsCount;
8890

8991
@Metric("Number of file system operations")
9092
public long totalFileOps(){
@@ -334,6 +336,10 @@ public void addBlockOpsBatched(int count) {
334336
blockOpsBatched.incr(count);
335337
}
336338

339+
public void setPendingEditsCount(int size) {
340+
pendingEditsCount.set(size);
341+
}
342+
337343
public void addTransaction(long latency) {
338344
transactions.add(latency);
339345
}

0 commit comments

Comments
 (0)