From 928cb45091a5ffd1ba4e14842d1d3146df012642 Mon Sep 17 00:00:00 2001 From: Anush Nadathur Date: Tue, 18 Jun 2024 18:52:50 -0700 Subject: [PATCH] [Metrics] Added additional metrics for CASE and Subscription (#33934) * [Metrics] Added additional metrics for CASE and Subscription - CASE session tracking - Subscription Setup - MRP Retries * Fixed code review comment * Restyler fixes * Update src/darwin/Framework/CHIP/MTRDevice.mm Co-authored-by: Boris Zbarsky * Addressed code review comments * Update src/darwin/Framework/CHIP/MTRMetricKeys.h Co-authored-by: Boris Zbarsky * Fixed build failure when tracing is off * Resytle fixes --------- Co-authored-by: Boris Zbarsky --- src/app/ReadClient.cpp | 17 ++++- src/darwin/Framework/CHIP/MTRDevice.mm | 6 ++ src/darwin/Framework/CHIP/MTRMetricKeys.h | 4 ++ src/messaging/ReliableMessageMgr.cpp | 2 + src/protocols/secure_channel/CASESession.cpp | 75 ++++++++++++++++---- src/tracing/metric_keys.h | 24 +++++++ 6 files changed, 114 insertions(+), 14 deletions(-) diff --git a/src/app/ReadClient.cpp b/src/app/ReadClient.cpp index bb058e091858c7..78b0a56fdd7c26 100644 --- a/src/app/ReadClient.cpp +++ b/src/app/ReadClient.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,11 @@ void ReadClient::Close(CHIP_ERROR aError, bool allowResubscription) } else { + if (IsAwaitingInitialReport() || IsAwaitingSubscribeResponse()) + { + MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, aError); + } + ClearActiveSubscriptionState(); if (aError != CHIP_NO_ERROR) { @@ -491,6 +497,7 @@ CHIP_ERROR ReadClient::OnMessageReceived(Messaging::ExchangeContext * apExchange ChipLogProgress(DataManagement, "SubscribeResponse is received"); VerifyOrExit(apExchangeContext == mExchange.Get(), err = CHIP_ERROR_INCORRECT_STATE); err = ProcessSubscribeResponse(std::move(aPayload)); + MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err); } else if (aPayloadHeader.HasMessageType(Protocols::InteractionModel::MsgType::StatusResponse)) { @@ -684,6 +691,7 @@ void ReadClient::OnResponseTimeout(Messaging::ExchangeContext * apExchangeContex { ChipLogError(DataManagement, "Time out! failed to receive report data from Exchange: " ChipLogFormatExchange, ChipLogValueExchange(apExchangeContext)); + Close(CHIP_ERROR_TIMEOUT); } @@ -1096,11 +1104,18 @@ CHIP_ERROR ReadClient::SendSubscribeRequest(const ReadPrepareParams & aReadPrepa VerifyOrReturnError(aReadPrepareParams.mMinIntervalFloorSeconds <= aReadPrepareParams.mMaxIntervalCeilingSeconds, CHIP_ERROR_INVALID_ARGUMENT); - return SendSubscribeRequestImpl(aReadPrepareParams); + auto err = SendSubscribeRequestImpl(aReadPrepareParams); + if (CHIP_NO_ERROR != err) + { + MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err); + } + return err; } CHIP_ERROR ReadClient::SendSubscribeRequestImpl(const ReadPrepareParams & aReadPrepareParams) { + MATTER_LOG_METRIC_BEGIN(Tracing::kMetricDeviceSubscriptionSetup); + VerifyOrReturnError(ClientState::Idle == mState, CHIP_ERROR_INCORRECT_STATE); if (&aReadPrepareParams != &mReadPrepareParams) diff --git a/src/darwin/Framework/CHIP/MTRDevice.mm b/src/darwin/Framework/CHIP/MTRDevice.mm index 9d1853c0e3d663..89db49f077cdca 100644 --- a/src/darwin/Framework/CHIP/MTRDevice.mm +++ b/src/darwin/Framework/CHIP/MTRDevice.mm @@ -160,6 +160,7 @@ - (void)_deviceInternalStateChanged:(MTRDevice *)device; using namespace chip; using namespace chip::app; using namespace chip::Protocols::InteractionModel; +using namespace chip::Tracing::DarwinFramework; typedef void (^FirstReportHandler)(void); @@ -1184,6 +1185,7 @@ - (void)_handleSubscriptionEstablished if (HadSubscriptionEstablishedOnce(_internalDeviceState)) { [self _changeInternalState:MTRInternalDeviceStateLaterSubscriptionEstablished]; } else { + MATTER_LOG_METRIC_END(kMetricMTRDeviceInitialSubscriptionSetup, CHIP_NO_ERROR); [self _changeInternalState:MTRInternalDeviceStateInitialSubscriptionEstablished]; } @@ -2340,6 +2342,10 @@ - (void)_setupSubscriptionWithReason:(NSString *)reason }); } + // This marks begin of initial subscription to the device (before CASE is established). The end is only marked after successfully setting + // up the subscription since it is always retried as long as the MTRDevice is kept running. + MATTER_LOG_METRIC_BEGIN(kMetricMTRDeviceInitialSubscriptionSetup); + // Call directlyGetSessionForNode because the subscription setup already goes through the subscription pool queue [_deviceController directlyGetSessionForNode:_nodeID.unsignedLongLongValue diff --git a/src/darwin/Framework/CHIP/MTRMetricKeys.h b/src/darwin/Framework/CHIP/MTRMetricKeys.h index 536da36b582053..d20ec3395656a4 100644 --- a/src/darwin/Framework/CHIP/MTRMetricKeys.h +++ b/src/darwin/Framework/CHIP/MTRMetricKeys.h @@ -16,6 +16,7 @@ #include #include +#include namespace chip { namespace Tracing { @@ -87,6 +88,9 @@ constexpr Tracing::MetricKey kMetricBLEDevicesRemoved = "dwnfw_ble_devices_remov // Unexpected C quality attribute update outside of priming constexpr Tracing::MetricKey kMetricUnexpectedCQualityUpdate = "dwnpm_bad_c_attr_update"; +// Setup from darwin MTRDevice for initial subscription to a device +constexpr Tracing::MetricKey kMetricMTRDeviceInitialSubscriptionSetup = "dwnpm_dev_initial_subscription_setup"; + } // namespace DarwinFramework } // namespace Tracing } // namespace chip diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index c9178a03d1822d..b7bd96f12e1e00 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #if CHIP_CONFIG_ENABLE_ICD_SERVER #include // nogncheck @@ -170,6 +171,7 @@ void ReliableMessageMgr::ExecuteActions() "Retransmitting MessageCounter:" ChipLogFormatMessageCounter " on exchange " ChipLogFormatExchange " Send Cnt %d", messageCounter, ChipLogValueExchange(&entry->ec.Get()), entry->sendCount); + MATTER_LOG_METRIC(Tracing::kMetricDeviceRMPRetryCount, entry->sendCount); CalculateNextRetransTime(*entry); SendFromRetransTable(entry); diff --git a/src/protocols/secure_channel/CASESession.cpp b/src/protocols/secure_channel/CASESession.cpp index b0642a966890fd..b7aa87e9d5e278 100644 --- a/src/protocols/secure_channel/CASESession.cpp +++ b/src/protocols/secure_channel/CASESession.cpp @@ -47,6 +47,7 @@ #include #include #include +#include #include namespace { @@ -101,6 +102,7 @@ using namespace Credentials; using namespace Messaging; using namespace Encoding; using namespace Protocols::SecureChannel; +using namespace Tracing; constexpr uint8_t kKDFSR2Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x32 }; constexpr uint8_t kKDFSR3Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x33 }; @@ -521,14 +523,15 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric CHIP_ERROR err = CHIP_NO_ERROR; // Return early on error here, as we have not initialized any state yet - ReturnErrorCodeIf(exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT); - ReturnErrorCodeIf(fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT); + ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT); + ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT); // Use FabricTable directly to avoid situation of dangling index from stale FabricInfo // until we factor-out any FabricInfo direct usage. - ReturnErrorCodeIf(peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex, CHIP_ERROR_INVALID_ARGUMENT); + ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex, + CHIP_ERROR_INVALID_ARGUMENT); const auto * fabricInfo = fabricTable->FindFabricWithIndex(peerScopedNodeId.GetFabricIndex()); - ReturnErrorCodeIf(fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT); + ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT); err = Init(sessionManager, policy, delegate, peerScopedNodeId); @@ -542,9 +545,11 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric // From here onwards, let's go to exit on error, as some state might have already // been initialized - SuccessOrExit(err); + SuccessOrExitWithMetric(kMetricDeviceCASESession, err); - SuccessOrExit(err = fabricTable->AddFabricDelegate(this)); + SuccessOrExitWithMetric(kMetricDeviceCASESession, err = fabricTable->AddFabricDelegate(this)); + + MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession); // Set the PeerAddress in the secure session up front to indicate the // Transport Type of the session that is being set up. @@ -571,6 +576,7 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric } else { + MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma1); err = SendSigma1(); SuccessOrExit(err); } @@ -578,6 +584,8 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric exit: if (err != CHIP_NO_ERROR) { + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err); + MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err); Clear(); } return err; @@ -601,6 +609,7 @@ void CASESession::OnResponseTimeout(ExchangeContext * ec) void CASESession::AbortPendingEstablish(CHIP_ERROR err) { + MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err); MATTER_TRACE_SCOPE("AbortPendingEstablish", "CASESession"); // This needs to come before Clear() which will reset mState. SessionEstablishmentStage state = MapCASEStateToSessionEstablishmentStage(mState); @@ -851,7 +860,17 @@ CHIP_ERROR CASESession::SendSigma1() ReturnErrorOnFailure(mExchangeCtxt.Value()->SendMessage(Protocols::SecureChannel::MsgType::CASE_Sigma1, std::move(msg_R1), SendFlags(SendMessageFlags::kExpectResponse))); - mState = resuming ? State::kSentSigma1Resume : State::kSentSigma1; + if (resuming) + { + mState = State::kSentSigma1Resume; + + // Flags that Resume is being attempted + MATTER_LOG_METRIC(kMetricDeviceCASESessionSigma1Resume); + } + else + { + mState = State::kSentSigma1; + } ChipLogProgress(SecureChannel, "Sent Sigma1 msg"); @@ -984,7 +1003,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg) std::copy(resumptionId.begin(), resumptionId.end(), mResumeResumptionId.begin()); // Send Sigma2Resume message to the initiator - SuccessOrExit(err = SendSigma2Resume()); + MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2Resume); + err = SendSigma2Resume(); + if (CHIP_NO_ERROR != err) + { + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2Resume, err); + } + SuccessOrExit(err); mDelegate->OnSessionEstablishmentStarted(); @@ -1013,7 +1038,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg) // mRemotePubKey.Length() == initiatorPubKey.size() == kP256_PublicKey_Length. memcpy(mRemotePubKey.Bytes(), initiatorPubKey.data(), mRemotePubKey.Length()); - SuccessOrExit(err = SendSigma2()); + MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2); + err = SendSigma2(); + if (CHIP_NO_ERROR != err) + { + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err); + } + SuccessOrExit(err); mDelegate->OnSessionEstablishmentStarted(); @@ -1236,6 +1267,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg) ChipLogDetail(SecureChannel, "Received Sigma2Resume msg"); MATTER_TRACE_COUNTER("Sigma2Resume"); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err); uint8_t sigma2ResumeMIC[CHIP_CRYPTO_AEAD_MIC_LENGTH_BYTES]; @@ -1278,6 +1310,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg) ChipLogError(SecureChannel, "Unable to save session resumption state: %" CHIP_ERROR_FORMAT, err2.Format()); } + MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished); SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess); mState = State::kFinishedViaResume; @@ -1294,10 +1327,17 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg) CHIP_ERROR CASESession::HandleSigma2_and_SendSigma3(System::PacketBufferHandle && msg) { MATTER_TRACE_SCOPE("HandleSigma2_and_SendSigma3", "CASESession"); - ReturnErrorOnFailure(HandleSigma2(std::move(msg))); - ReturnErrorOnFailure(SendSigma3a()); + CHIP_ERROR err = HandleSigma2(std::move(msg)); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err); + ReturnErrorOnFailure(err); - return CHIP_NO_ERROR; + MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma3); + err = SendSigma3a(); + if (CHIP_NO_ERROR != err) + { + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma3, err); + } + return err; } CHIP_ERROR CASESession::HandleSigma2(System::PacketBufferHandle && msg) @@ -1708,6 +1748,7 @@ CHIP_ERROR CASESession::HandleSigma3a(System::PacketBufferHandle && msg) ChipLogProgress(SecureChannel, "Received Sigma3 msg"); MATTER_TRACE_COUNTER("Sigma3"); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err); auto helper = WorkHelper::Create(*this, &HandleSigma3b, &CASESession::HandleSigma3c); VerifyOrExit(helper, err = CHIP_ERROR_NO_MEMORY); @@ -1888,6 +1929,7 @@ CHIP_ERROR CASESession::HandleSigma3c(HandleSigma3Data & data, CHIP_ERROR status } } + MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished); SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess); mState = State::kFinished; @@ -2288,6 +2330,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea case MsgType::StatusReport: err = HandleStatusReport(std::move(msg), /* successExpected*/ false); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err); break; default: @@ -2308,6 +2351,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea case MsgType::StatusReport: err = HandleStatusReport(std::move(msg), /* successExpected*/ false); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err); break; default: @@ -2324,6 +2368,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea case MsgType::StatusReport: err = HandleStatusReport(std::move(msg), /* successExpected*/ false); + MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err); break; default: @@ -2335,7 +2380,11 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea case State::kSentSigma2Resume: if (msgType == Protocols::SecureChannel::MsgType::StatusReport) { - err = HandleStatusReport(std::move(msg), /* successExpected*/ true); + // Need to capture before invoking status report since 'this' might be deallocated on successful completion of sigma3 + MetricKey key = (mState == State::kSentSigma3) ? kMetricDeviceCASESessionSigma3 : kMetricDeviceCASESessionSigma2Resume; + err = HandleStatusReport(std::move(msg), /* successExpected*/ true); + MATTER_LOG_METRIC_END(key, err); + IgnoreUnusedVariable(key); } break; default: diff --git a/src/tracing/metric_keys.h b/src/tracing/metric_keys.h index 001e9c8036bd27..447d6c03dddc9c 100644 --- a/src/tracing/metric_keys.h +++ b/src/tracing/metric_keys.h @@ -54,5 +54,29 @@ constexpr MetricKey kMetricDeviceOperationalDiscoveryAttemptCount = "core_dev_op // CASE Session constexpr MetricKey kMetricDeviceCASESession = "core_dev_case_session"; +// CASE Session Sigma1 +constexpr MetricKey kMetricDeviceCASESessionSigma1 = "core_dev_case_session_sigma1"; + +// CASE Session Sigma1Resume +constexpr MetricKey kMetricDeviceCASESessionSigma1Resume = "core_dev_case_session_sigma1_resume"; + +// CASE Session Sigma2 +constexpr MetricKey kMetricDeviceCASESessionSigma2 = "core_dev_case_session_sigma2"; + +// CASE Session Sigma3 +constexpr MetricKey kMetricDeviceCASESessionSigma3 = "core_dev_case_session_sigma3"; + +// CASE Session Sigma2 Resume +constexpr MetricKey kMetricDeviceCASESessionSigma2Resume = "core_dev_case_session_sigma2_resume"; + +// CASE Session SigmaFinished +constexpr MetricKey kMetricDeviceCASESessionSigmaFinished = "core_dev_case_session_sigma_finished"; + +// MRP Retry Counter +constexpr MetricKey kMetricDeviceRMPRetryCount = "core_dev_rmp_retry_count"; + +// Subscription setup +constexpr MetricKey kMetricDeviceSubscriptionSetup = "core_dev_subscription_setup"; + } // namespace Tracing } // namespace chip