Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix lifetime of Darwin SubscriptionCallback to avoid shutdown crashes. #22324

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 31 additions & 21 deletions src/darwin/Framework/CHIP/MTRBaseDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,13 @@ - (void)invalidateCASESession
void OnSubscriptionEstablished(SubscriptionId aSubscriptionId) override;

void ReportData();
void ReportError(CHIP_ERROR err);
void ReportError(const StatusIB & status);
void ReportError(NSError * _Nullable err);

// Report an error, which may be due to issues in our own internal state or
// due to the OnError callback happening.
//
// aCancelSubscription should be false for the OnError case, since it will
// be immediately followed by OnDone and we want to do the deletion there.
void ReportError(CHIP_ERROR aError, bool aCancelSubscription = true);

private:
dispatch_queue_t mQueue;
Expand All @@ -347,11 +351,13 @@ - (void)invalidateCASESession
NSMutableArray * _Nullable mAttributeReports = nil;
NSMutableArray * _Nullable mEventReports = nil;

// Our lifetime management is a little complicated. On error we
// attempt to delete the ReadClient, but asynchronously. While
// that's pending, someone else (e.g. an error it runs into) could
// delete it too. And if someone else does attempt to delete it, we want to
// make sure we delete ourselves as well.
// Our lifetime management is a little complicated. On errors that don't
// originate with the ReadClient we attempt to delete ourselves (and hence
// the ReadClient), but asynchronously, because the ReadClient API doesn't
// allow sync deletion under callbacks other than OnDone. While that's
// pending, something else (e.g. an error it runs into) could end up calling
// OnDone on us. And generally if OnDone is called we want to delete
// ourselves as well.
//
// To handle this, enforce the following rules:
//
Expand Down Expand Up @@ -1604,7 +1610,7 @@ - (instancetype)initWithPath:(const ConcreteEventPath &)path
{
// If OnError is called after OnReportBegin, we should report the collected data
ReportData();
ReportError([MTRError errorForCHIPErrorCode:aError]);
ReportError(aError, /* aCancelSubscription = */ false);
}

void SubscriptionCallback::OnDone(ReadClient *)
Expand Down Expand Up @@ -1647,14 +1653,11 @@ - (instancetype)initWithPath:(const ConcreteEventPath &)path
}
}

void SubscriptionCallback::ReportError(CHIP_ERROR err) { ReportError([MTRError errorForCHIPErrorCode:err]); }

void SubscriptionCallback::ReportError(const StatusIB & status) { ReportError([MTRError errorForIMStatus:status]); }

void SubscriptionCallback::ReportError(NSError * _Nullable err)
void SubscriptionCallback::ReportError(CHIP_ERROR aError, bool aCancelSubscription)
{
auto * err = [MTRError errorForCHIPErrorCode:aError];
if (!err) {
// Very strange... Someone tried to create a MTRError for a success status?
// Very strange... Someone tried to report a success status as an error?
return;
}

Expand All @@ -1675,15 +1678,22 @@ - (instancetype)initWithPath:(const ConcreteEventPath &)path
if (onDoneHandler) {
onDoneHandler();
}
});

// Deletion of our ReadClient (and hence of ourselves, since the
// ReadClient has a pointer to us) needs to happen on the Matter work
// queue.
if (aCancelSubscription) {
// We can't synchronously delete ourselves, because we're inside one of
// the ReadClient callbacks and we need to outlive the callback's
// execution. Queue an async deletion on the Matter queue (where we are
// running already).
//
// If we now get OnDone, we will ignore that, since we have the deletion
// posted already, but that's OK even during shutdown: since we are
// queueing the deletion now, it will be processed before the Matter queue
// gets paused, which is fairly early in the shutdown process.
mHaveQueuedDeletion = true;
dispatch_async(DeviceLayer::PlatformMgrImpl().GetWorkQueue(), ^{
delete myself;
});
});

mHaveQueuedDeletion = true;
}
}
} // anonymous namespace
50 changes: 30 additions & 20 deletions src/darwin/Framework/CHIP/MTRDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,13 @@ - (id)strongObject
CHIP_ERROR OnResubscriptionNeeded(ReadClient * apReadClient, CHIP_ERROR aTerminationCause) override;

void ReportData();
void ReportError(CHIP_ERROR err);
void ReportError(const StatusIB & status);
void ReportError(NSError * _Nullable err);

// Report an error, which may be due to issues in our own internal state or
// due to the OnError callback happening.
//
// aCancelSubscription should be false for the OnError case, since it will
// be immediately followed by OnDone and we want to do the deletion there.
void ReportError(CHIP_ERROR aError, bool aCancelSubscription = true);

private:
dispatch_queue_t mQueue;
Expand All @@ -168,11 +172,13 @@ - (id)strongObject
NSMutableArray * _Nullable mAttributeReports = nil;
NSMutableArray * _Nullable mEventReports = nil;

// Our lifetime management is a little complicated. On error we
// attempt to delete the ReadClient, but asynchronously. While
// that's pending, someone else (e.g. an error it runs into) could
// delete it too. And if someone else does attempt to delete it, we want to
// make sure we delete ourselves as well.
// Our lifetime management is a little complicated. On errors that don't
// originate with the ReadClient we attempt to delete ourselves (and hence
// the ReadClient), but asynchronously, because the ReadClient API doesn't
// allow sync deletion under callbacks other than OnDone. While that's
// pending, something else (e.g. an error it runs into) could end up calling
// OnDone on us. And generally if OnDone is called we want to delete
// ourselves as well.
//
// To handle this, enforce the following rules:
//
Expand Down Expand Up @@ -835,7 +841,7 @@ - (void)setExpectedValues:(NSArray<NSDictionary<NSString *, id> *> *)values expe
{
// If OnError is called after OnReportBegin, we should report the collected data
ReportData();
ReportError([MTRError errorForCHIPErrorCode:aError]);
ReportError(aError, /* aCancelSubscription = */ false);
}

void SubscriptionCallback::OnDone(ReadClient *)
Expand Down Expand Up @@ -883,12 +889,9 @@ - (void)setExpectedValues:(NSArray<NSDictionary<NSString *, id> *> *)values expe
return apReadClient->DefaultResubscribePolicy(aTerminationCause);
}

void SubscriptionCallback::ReportError(CHIP_ERROR err) { ReportError([MTRError errorForCHIPErrorCode:err]); }

void SubscriptionCallback::ReportError(const StatusIB & status) { ReportError([MTRError errorForIMStatus:status]); }

void SubscriptionCallback::ReportError(NSError * _Nullable err)
void SubscriptionCallback::ReportError(CHIP_ERROR aError, bool aCancelSubscription)
{
auto * err = [MTRError errorForCHIPErrorCode:aError];
if (!err) {
// Very strange... Someone tried to create a MTRError for a success status?
return;
Expand All @@ -911,15 +914,22 @@ - (void)setExpectedValues:(NSArray<NSDictionary<NSString *, id> *> *)values expe
if (onDoneHandler) {
onDoneHandler();
}
});

// Deletion of our ReadClient (and hence of ourselves, since the
// ReadClient has a pointer to us) needs to happen on the Matter work
// queue.
if (aCancelSubscription) {
// We can't synchronously delete ourselves, because we're inside one of
// the ReadClient callbacks and we need to outlive the callback's
// execution. Queue an async deletion on the Matter queue (where we are
// running already).
//
// If we now get OnDone, we will ignore that, since we have the deletion
// posted already, but that's OK even during shutdown: since we are
// queueing the deletion now, it will be processed before the Matter queue
// gets paused, which is fairly early in the shutdown process.
mHaveQueuedDeletion = true;
dispatch_async(DeviceLayer::PlatformMgrImpl().GetWorkQueue(), ^{
delete myself;
});
});

mHaveQueuedDeletion = true;
}
}
} // anonymous namespace