From c872f19ac68f9116520109f7cccc0bf351dcb1fc Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Fri, 27 Oct 2023 13:34:36 -0400 Subject: [PATCH] Handle errors in sending messages during commissioning. The commissioning state machine, when handling a state, needs to either queue up an async process that will eventually push the state machine along or needs to fail out and stop the commissioning process. We had a number of state handlers that could fail to do both if the attempt to send the message failed, which would leave the commissioner in a stuck state where it thought it was waiting for the async work, while the async work was not queued. This change adds the relevant error checks to ensure we never wait for work that has not started. Fixes https://github.com/project-chip/connectedhomeip/issues/30041 --- src/controller/CHIPDeviceController.cpp | 216 +++++++++++++++++++----- 1 file changed, 173 insertions(+), 43 deletions(-) diff --git a/src/controller/CHIPDeviceController.cpp b/src/controller/CHIPDeviceController.cpp index 87301129dbe743..135812979d16aa 100644 --- a/src/controller/CHIPDeviceController.cpp +++ b/src/controller/CHIPDeviceController.cpp @@ -1018,8 +1018,6 @@ CHIP_ERROR DeviceCommissioner::SendCertificateChainRequestCommand(DeviceProxy * OperationalCredentials::Commands::CertificateChainRequest::Type request; request.certificateType = static_cast(certificateType); return SendCommand(device, request, OnCertificateChainResponse, OnCertificateChainFailureResponse, timeout); - - return CHIP_NO_ERROR; } void DeviceCommissioner::OnCertificateChainFailureResponse(void * context, CHIP_ERROR error) @@ -1651,8 +1649,16 @@ void DeviceCommissioner::CleanupCommissioning(DeviceProxy * proxy, NodeId nodeId ChipLogProgress(Controller, "Expiring failsafe on proxy %p", proxy); mDeviceBeingCommissioned = proxy; // We actually want to do the same thing on success or failure because we're already in a failure state - SendCommand(proxy, request, OnDisarmFailsafe, OnDisarmFailsafeFailure, - /* timeout = */ NullOptional); + CHIP_ERROR err = SendCommand(proxy, request, OnDisarmFailsafe, OnDisarmFailsafeFailure, + /* timeout = */ NullOptional); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just pretend like the + // command errored out async. + ChipLogError(Controller, "Failed to send command to disarm fail-safe: %" CHIP_ERROR_FORMAT, err.Format()); + DisarmDone(); + return; + } } } @@ -2391,7 +2397,7 @@ void DeviceCommissioner::SendCommissioningReadRequest(DeviceProxy * proxy, Optio CHIP_ERROR err = readClient->SendRequest(readParams); if (err != CHIP_NO_ERROR) { - ChipLogError(Controller, "Failed to send read request for networking clusters"); + ChipLogError(Controller, "Failed to send read request: %" CHIP_ERROR_FORMAT, err.Format()); CommissioningStageComplete(err); return; } @@ -2435,7 +2441,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } break; case CommissioningStage::kReadCommissioningInfo: { - ChipLogProgress(Controller, "Sending request for commissioning information"); + ChipLogProgress(Controller, "Sending read request for commissioning information"); // NOTE: this array cannot have more than 9 entries, since the spec mandates that server only needs to support 9 app::AttributePathParams readPaths[9]; // Read all the feature maps for all the networking clusters on any endpoint to determine what is supported @@ -2466,8 +2472,6 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } break; case CommissioningStage::kReadCommissioningInfo2: { - ChipLogProgress(Controller, "Sending request for commissioning information -- Part 2"); - size_t numberOfAttributes = 0; // This is done in a separate step since we've already used up all the available read paths in the previous read step app::AttributePathParams readPaths[9]; @@ -2493,28 +2497,34 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio // We don't actually want to do this step, so just bypass it ChipLogProgress(Controller, "kReadCommissioningInfo2 step called without parameter set, skipping"); CommissioningStageComplete(CHIP_NO_ERROR); + return; } - else - { - SendCommissioningReadRequest(proxy, timeout, readPaths, numberOfAttributes); - } + + ChipLogProgress(Controller, "Sending request for commissioning information -- Part 2"); + SendCommissioningReadRequest(proxy, timeout, readPaths, numberOfAttributes); } break; case CommissioningStage::kConfigureUTCTime: { TimeSynchronization::Commands::SetUTCTime::Type request; uint64_t kChipEpochUsSinceUnixEpoch = static_cast(kChipEpochSecondsSinceUnixEpoch) * chip::kMicrosecondsPerSecond; System::Clock::Microseconds64 utcTime; - if (System::SystemClock().GetClock_RealTime(utcTime) == CHIP_NO_ERROR && utcTime.count() > kChipEpochUsSinceUnixEpoch) - { - request.UTCTime = utcTime.count() - kChipEpochUsSinceUnixEpoch; - // For now, we assume a seconds granularity - request.granularity = TimeSynchronization::GranularityEnum::kSecondsGranularity; - SendCommand(proxy, request, OnBasicSuccess, OnSetUTCError, endpoint, timeout); - } - else + if (System::SystemClock().GetClock_RealTime(utcTime) != CHIP_NO_ERROR || utcTime.count() <= kChipEpochUsSinceUnixEpoch) { // We have no time to give, but that's OK, just complete this stage CommissioningStageComplete(CHIP_NO_ERROR); + return; + } + + request.UTCTime = utcTime.count() - kChipEpochUsSinceUnixEpoch; + // For now, we assume a seconds granularity + request.granularity = TimeSynchronization::GranularityEnum::kSecondsGranularity; + CHIP_ERROR err = SendCommand(proxy, request, OnBasicSuccess, OnSetUTCError, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SetUTCTime command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; } break; } @@ -2527,7 +2537,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } TimeSynchronization::Commands::SetTimeZone::Type request; request.timeZone = params.GetTimeZone().Value(); - SendCommand(proxy, request, OnSetTimeZoneResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnSetTimeZoneResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SetTimeZone command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; } case CommissioningStage::kConfigureDSTOffset: { @@ -2539,7 +2556,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } TimeSynchronization::Commands::SetDSTOffset::Type request; request.DSTOffset = params.GetDSTOffsets().Value(); - SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SetDSTOffset command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; } case CommissioningStage::kConfigureDefaultNTP: { @@ -2551,7 +2575,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } TimeSynchronization::Commands::SetDefaultNTP::Type request; request.defaultNTP = params.GetDefaultNTP().Value(); - SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SetDefaultNTP command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; } case CommissioningStage::kScanNetworks: { @@ -2561,7 +2592,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio request.ssid.Emplace(params.GetWiFiCredentials().Value().ssid); } request.breadcrumb.Emplace(breadcrumb); - SendCommand(proxy, request, OnScanNetworksResponse, OnScanNetworksFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnScanNetworksResponse, OnScanNetworksFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send ScanNetworks command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; } case CommissioningStage::kNeedsNetworkCreds: { @@ -2621,18 +2659,43 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio request.newRegulatoryConfig = regulatoryConfig; request.countryCode = countryCode; request.breadcrumb = breadcrumb; - SendCommand(proxy, request, OnSetRegulatoryConfigResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnSetRegulatoryConfigResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SetRegulatoryConfig command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; - case CommissioningStage::kSendPAICertificateRequest: + case CommissioningStage::kSendPAICertificateRequest: { ChipLogProgress(Controller, "Sending request for PAI certificate"); - SendCertificateChainRequestCommand(proxy, CertificateType::kPAI, timeout); + CHIP_ERROR err = SendCertificateChainRequestCommand(proxy, CertificateType::kPAI, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send CertificateChainRequest command to get PAI: %" CHIP_ERROR_FORMAT, + err.Format()); + CommissioningStageComplete(err); + return; + } break; - case CommissioningStage::kSendDACCertificateRequest: + } + case CommissioningStage::kSendDACCertificateRequest: { ChipLogProgress(Controller, "Sending request for DAC certificate"); - SendCertificateChainRequestCommand(proxy, CertificateType::kDAC, timeout); + CHIP_ERROR err = SendCertificateChainRequestCommand(proxy, CertificateType::kDAC, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send CertificateChainRequest command to get DAC: %" CHIP_ERROR_FORMAT, + err.Format()); + CommissioningStageComplete(err); + return; + } break; - case CommissioningStage::kSendAttestationRequest: + } + case CommissioningStage::kSendAttestationRequest: { ChipLogProgress(Controller, "Sending Attestation Request to the device."); if (!params.GetAttestationNonce().HasValue()) { @@ -2640,8 +2703,16 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio CommissioningStageComplete(CHIP_ERROR_INVALID_ARGUMENT); return; } - SendAttestationRequestCommand(proxy, params.GetAttestationNonce().Value(), timeout); + CHIP_ERROR err = SendAttestationRequestCommand(proxy, params.GetAttestationNonce().Value(), timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send AttestationRequest command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; + } case CommissioningStage::kAttestationVerification: { ChipLogProgress(Controller, "Verifying attestation"); if (!params.GetAttestationElements().HasValue() || !params.GetAttestationSignature().HasValue() || @@ -2667,15 +2738,23 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } } break; - case CommissioningStage::kSendOpCertSigningRequest: + case CommissioningStage::kSendOpCertSigningRequest: { if (!params.GetCSRNonce().HasValue()) { ChipLogError(Controller, "No CSR nonce found"); CommissioningStageComplete(CHIP_ERROR_INVALID_ARGUMENT); return; } - SendOperationalCertificateSigningRequestCommand(proxy, params.GetCSRNonce().Value(), timeout); + CHIP_ERROR err = SendOperationalCertificateSigningRequestCommand(proxy, params.GetCSRNonce().Value(), timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send CSR request: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; + } case CommissioningStage::kValidateCSR: { if (!params.GetNOCChainGenerationParameters().HasValue() || !params.GetDAC().HasValue() || !params.GetCSRNonce().HasValue()) { @@ -2691,6 +2770,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio ChipLogError(Controller, "Unable to validate CSR"); } CommissioningStageComplete(err); + return; } break; case CommissioningStage::kGenerateNOCChain: { @@ -2724,7 +2804,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio CHIP_ERROR err = SendTrustedRootCertificate(proxy, params.GetRootCert().Value(), timeout); if (err != CHIP_NO_ERROR) { - ChipLogError(Controller, "Error sending trusted root certificate: %s", err.AsString()); + ChipLogError(Controller, "Error sending trusted root certificate: %" CHIP_ERROR_FORMAT, err.Format()); CommissioningStageComplete(err); return; } @@ -2744,16 +2824,24 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } } break; - case CommissioningStage::kSendNOC: + case CommissioningStage::kSendNOC: { if (!params.GetNoc().HasValue() || !params.GetIpk().HasValue() || !params.GetAdminSubject().HasValue()) { ChipLogError(Controller, "AddNOC contents not specified"); CommissioningStageComplete(CHIP_ERROR_INVALID_ARGUMENT); return; } - SendOperationalCertificate(proxy, params.GetNoc().Value(), params.GetIcac(), params.GetIpk().Value(), - params.GetAdminSubject().Value(), timeout); + CHIP_ERROR err = SendOperationalCertificate(proxy, params.GetNoc().Value(), params.GetIcac(), params.GetIpk().Value(), + params.GetAdminSubject().Value(), timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Error sending operational certificate: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; + } case CommissioningStage::kConfigureTrustedTimeSource: { if (!params.GetTrustedTimeSource().HasValue()) { @@ -2763,7 +2851,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio } TimeSynchronization::Commands::SetTrustedTimeSource::Type request; request.trustedTimeSource = params.GetTrustedTimeSource().Value(); - SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnBasicSuccess, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send SendTrustedTimeSource command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } break; } case CommissioningStage::kWiFiNetworkSetup: { @@ -2778,7 +2873,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio request.ssid = params.GetWiFiCredentials().Value().ssid; request.credentials = params.GetWiFiCredentials().Value().credentials; request.breadcrumb.Emplace(breadcrumb); - SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send AddOrUpdateWiFiNetwork command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; case CommissioningStage::kThreadNetworkSetup: { @@ -2791,7 +2893,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio NetworkCommissioning::Commands::AddOrUpdateThreadNetwork::Type request; request.operationalDataset = params.GetThreadOperationalDataset().Value(); request.breadcrumb.Emplace(breadcrumb); - SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnNetworkConfigResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send AddOrUpdateThreadNetwork command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; case CommissioningStage::kFailsafeBeforeWiFiEnable: @@ -2812,7 +2921,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio NetworkCommissioning::Commands::ConnectNetwork::Type request; request.networkID = params.GetWiFiCredentials().Value().ssid; request.breadcrumb.Emplace(breadcrumb); - SendCommand(proxy, request, OnConnectNetworkResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnConnectNetworkResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send WiFi ConnectNetwork command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; case CommissioningStage::kThreadNetworkEnable: { @@ -2829,7 +2945,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio NetworkCommissioning::Commands::ConnectNetwork::Type request; request.networkID = extendedPanId; request.breadcrumb.Emplace(breadcrumb); - SendCommand(proxy, request, OnConnectNetworkResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnConnectNetworkResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send Thread ConnectNetwork command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; case CommissioningStage::kFindOperational: { @@ -2855,7 +2978,14 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio break; case CommissioningStage::kSendComplete: { GeneralCommissioning::Commands::CommissioningComplete::Type request; - SendCommand(proxy, request, OnCommissioningCompleteResponse, OnBasicFailure, endpoint, timeout); + CHIP_ERROR err = SendCommand(proxy, request, OnCommissioningCompleteResponse, OnBasicFailure, endpoint, timeout); + if (err != CHIP_NO_ERROR) + { + // We won't get any async callbacks here, so just complete our stage. + ChipLogError(Controller, "Failed to send CommissioningComplete command: %" CHIP_ERROR_FORMAT, err.Format()); + CommissioningStageComplete(err); + return; + } } break; case CommissioningStage::kCleanup: