Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HDDS-11779. Add DN metrics to show deletion progress #7552

Merged
merged 3 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public final class BlockDeletingServiceMetrics {
@Metric(about = "The number of failed delete blocks.")
private MutableCounterLong failureCount;

@Metric(about = "The number of delete block transaction processed.")
private MutableCounterLong processedTransactionCount;

@Metric(about = "The number of out of order delete block transaction.")
private MutableCounterLong outOfOrderDeleteBlockTransactionCount;

Expand All @@ -55,8 +58,7 @@ public final class BlockDeletingServiceMetrics {
@Metric(about = "The total number of DeleteBlockTransaction received")
private MutableCounterLong receivedTransactionCount;

@Metric(about = "The total number of DeleteBlockTransaction" +
" that is a retry Transaction")
@Metric(about = "The total number of DeleteBlockTransaction that is a retry Transaction")
private MutableCounterLong receivedRetryTransactionCount;

@Metric(about = "The total number of Container received to be processed")
Expand All @@ -74,10 +76,20 @@ public final class BlockDeletingServiceMetrics {
@Metric(about = "The total number of Container chosen to be deleted.")
private MutableGaugeLong totalContainerChosenCount;

@Metric(about = "The total number of transactions which failed due" +
" to container lock wait timeout.")
@Metric(about = "The total number of transactions which failed due to container lock wait timeout.")
private MutableGaugeLong totalLockTimeoutTransactionCount;

@Metric(about = "The total number of deletion commands received.")
private MutableGaugeLong totalCommandsReceived;

@Metric(about = "The total number of deletion commands that were discarded " +
"due to the queue being full.")
private MutableGaugeLong totalCommandsDiscarded;

@Metric(about = "The total number of deletion transactions that were discarded " +
"due to the transaction being a duplicate.")
private MutableGaugeLong totalTransactionsDiscarded;

private BlockDeletingServiceMetrics() {
}

Expand Down Expand Up @@ -112,6 +124,10 @@ public void incrFailureCount() {
this.failureCount.incr();
}

public void incrProcessedTransactionCount(long count) {
processedTransactionCount.incr(count);
}

public void incrReceivedTransactionCount(long count) {
receivedTransactionCount.incr(count);
}
Expand Down Expand Up @@ -148,6 +164,18 @@ public void incrTotalLockTimeoutTransactionCount() {
totalLockTimeoutTransactionCount.incr();
}

public void incrTotalCommandsReceived(long delta) {
this.totalCommandsReceived.incr(delta);
}

public void incrTotalCommandsDiscarded(long delta) {
this.totalCommandsDiscarded.incr(delta);
}

public void incrTotalTransactionsDiscarded(long delta) {
this.totalTransactionsDiscarded.incr(delta);
}

public long getSuccessCount() {
return successCount.value();
}
Expand Down Expand Up @@ -184,6 +212,26 @@ public long getTotalLockTimeoutTransactionCount() {
return totalLockTimeoutTransactionCount.value();
}

public long getProcessedTransactionCount() {
return processedTransactionCount.value();
}

public long getReceivedTransactionCount() {
return receivedTransactionCount.value();
}

public long getTotalCommandsReceived() {
return totalCommandsReceived.value();
}

public long getTotalCommandsDiscarded() {
return totalCommandsDiscarded.value();
}

public long getTotalTransactionsDiscarded() {
return totalTransactionsDiscarded.value();
}

@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
Expand All @@ -202,14 +250,22 @@ public String toString() {
+ receivedTransactionCount.value()).append("\t")
.append("receivedRetryTransactionCount = "
+ receivedRetryTransactionCount.value()).append("\t")
.append("processedTransactionCount = "
+ processedTransactionCount.value()).append("\t")
.append("receivedContainerCount = "
+ receivedContainerCount.value()).append("\t")
.append("receivedBlockCount = "
+ receivedBlockCount.value()).append("\t")
.append("markedBlockCount = "
+ markedBlockCount.value()).append("\t")
.append("totalLockTimeoutTransactionCount = "
+ totalLockTimeoutTransactionCount.value()).append("\t");
+ totalLockTimeoutTransactionCount.value()).append("\t")
.append("totalCommandsReceived = "
+ totalCommandsReceived.value()).append("\t")
.append("totalCommandsDiscarded = "
+ totalCommandsDiscarded.value()).append("\t")
.append("totalTransactionsDiscarded = "
+ totalTransactionsDiscarded.value()).append("\t");
return buffer.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public void handle(SCMCommand command, OzoneContainer container,
container, context, connectionManager);
try {
deleteCommandQueues.add(cmd);
blockDeleteMetrics.incrTotalCommandsReceived(1);
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
} catch (IllegalStateException e) {
String dnId = context.getParent().getDatanodeDetails().getUuidString();
Consumer<CommandStatus> updateFailure = (cmdStatus) -> {
Expand All @@ -157,6 +158,7 @@ public void handle(SCMCommand command, OzoneContainer container,
};
updateCommandStatus(cmd.getContext(), cmd.getCmd(), updateFailure, LOG);
LOG.warn("Command is discarded because of the command queue is full");
blockDeleteMetrics.incrTotalCommandsDiscarded(1);
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -462,6 +464,7 @@ public List<Future<DeleteBlockTransactionExecutionResult>> submitTasks(
Future<DeleteBlockTransactionExecutionResult> future =
executor.submit(new ProcessTransactionTask(tx));
futures.add(future);
blockDeleteMetrics.incrProcessedTransactionCount(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its just submit to future, will not tell if processed, so having metric here is not correct.
We can add this metrics at processCmd() when updating result as success/failure.

}
return futures;
}
Expand Down Expand Up @@ -650,6 +653,7 @@ public static boolean isDuplicateTransaction(long containerId, KeyValueContainer
containerData.getDeleteTransactionId()));
} else if (delTX.getTxID() == containerData.getDeleteTransactionId()) {
duplicate = true;
metrics.incrTotalTransactionsDiscarded(1);
Tejaskriya marked this conversation as resolved.
Show resolved Hide resolved
LOG.info(String.format("Delete blocks with txID %d for containerId: %d"
+ " is retried.", delTX.getTxID(), containerId));
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ public void testDeleteBlockCommandHandleWhenDeleteCommandQueuesFull()
assertEquals(cmdStatus.getProtoBufMessage().getBlockDeletionAck().getResultsCount(), 0);
}
}
blockDeleteMetrics = handler.getBlockDeleteMetrics();
assertEquals(5, blockDeleteMetrics.getTotalCommandsReceived());
assertEquals(2, blockDeleteMetrics.getTotalCommandsDiscarded());
}

@ContainerTestVersionInfo.ContainerTest
Expand Down Expand Up @@ -367,6 +370,7 @@ public void testDuplicateDeleteBlocksCommand(
assertTrue(results3.get(0).getSuccess());
assertEquals(0,
blockDeleteMetrics.getTotalLockTimeoutTransactionCount());
assertEquals(1, blockDeleteMetrics.getTotalTransactionsDiscarded());
// Duplicate cmd content will not be persisted.
assertEquals(2,
((KeyValueContainerData) container.getContainerData()).getNumPendingDeletionBlocks());
Expand Down
Loading