Skip to content

Commit

Permalink
[component_manager] Fix destroy/create race
Browse files Browse the repository at this point in the history
This fixes a bug in component manager that can cause a created component
to be incorrectly deleted if it was created just after another component
with the same name was deleted.

To be precise, the race is as follows:
1) Client calls DestroyChild.
2) component_manager schedules a DeleteChild action
3) DestroyChild returns.
4) Client calls CreateChild, with the same instance name.
5) Instance is created.
6) The DeleteChild action from #2 executes. It starts by scheduling a
   MarkDeleted action, which incorrectly marks the new instance deleted.

The solution is for DeleteChild not to call MarkDeleted. Instead, code
that handles instance destruction must explicitly call MarkDeleted
first.

This bug was causing `ffx session restart` to sometimes error with
CreateComponentFailed (which actually was an error of failing to bind to
the session component).

Tested: Manually verified the fix works with `session_control`. Added an
integration test which should flake if the issue is present. Unit test
checks that MarkDeleted is not rescheduled.

Multiply: destruction_integration_test: 200
Fixed: 41967
Change-Id: I1f19811b0afff0533a2721b45fe7cc07bde072ce
Reviewed-on: https://fuchsia-review.googlesource.com/c/fuchsia/+/511284
Commit-Queue: Gary Bressler <geb@google.com>
Reviewed-by: Fady Samuel <fsamuel@google.com>
  • Loading branch information
gebressler authored and CQ Bot committed Apr 15, 2021
1 parent d4118f9 commit 7a9ff48
Show file tree
Hide file tree
Showing 15 changed files with 345 additions and 80 deletions.
51 changes: 42 additions & 9 deletions src/sys/component_manager/src/builtin_environment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ use {
root_stop_notifier::RootStopNotifier,
work_scheduler::WorkScheduler,
},
anyhow::{format_err, Context as _, Error},
anyhow::{bail, format_err, Context as _, Error},
cm_rust::{CapabilityName, RunnerRegistration},
cm_types::Url,
fidl::endpoints::{create_endpoints, create_proxy, ServerEnd, ServiceMarker},
Expand All @@ -70,6 +70,7 @@ use {
futures::{channel::oneshot, prelude::*},
log::*,
std::{
default,
path::PathBuf,
sync::{Arc, Weak},
},
Expand All @@ -78,7 +79,6 @@ use {
// Allow shutdown to take up to an hour.
pub static SHUTDOWN_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60 * 60);

#[derive(Default)]
pub struct BuiltinEnvironmentBuilder {
// TODO(60804): Make component manager's namespace injectable here.
runtime_config: Option<RuntimeConfig>,
Expand All @@ -87,6 +87,21 @@ pub struct BuiltinEnvironmentBuilder {
utc_clock: Option<Arc<Clock>>,
add_environment_resolvers: bool,
inspector: Option<Inspector>,
enable_hub: bool,
}

impl default::Default for BuiltinEnvironmentBuilder {
fn default() -> Self {
Self {
runtime_config: None,
runners: vec![],
resolvers: ResolverRegistry::default(),
utc_clock: None,
add_environment_resolvers: false,
inspector: None,
enable_hub: true,
}
}
}

impl BuiltinEnvironmentBuilder {
Expand All @@ -108,6 +123,11 @@ impl BuiltinEnvironmentBuilder {
self
}

pub fn enable_hub(mut self, val: bool) -> Self {
self.enable_hub = val;
self
}

/// Create a UTC clock if required.
/// Not every instance of component_manager running on the system maintains a
/// UTC clock. Only the root component_manager should have the `maintain-utc-clock`
Expand Down Expand Up @@ -247,6 +267,7 @@ impl BuiltinEnvironmentBuilder {
builtin_runners,
self.utc_clock,
self.inspector.unwrap_or(component::inspector().clone()),
self.enable_hub,
)
.await?)
}
Expand Down Expand Up @@ -333,7 +354,7 @@ pub struct BuiltinEnvironment {
pub work_scheduler: Arc<WorkScheduler>,
pub realm_capability_host: Arc<RealmCapabilityHost>,
pub storage_admin_capability_host: Arc<StorageAdmin>,
pub hub: Arc<Hub>,
pub hub: Option<Arc<Hub>>,
pub builtin_runners: Vec<Arc<BuiltinRunner>>,
pub event_registry: Arc<EventRegistry>,
pub event_source_factory: Arc<EventSourceFactory>,
Expand All @@ -358,6 +379,7 @@ impl BuiltinEnvironment {
builtin_runners: Vec<Arc<BuiltinRunner>>,
utc_clock: Option<Arc<Clock>>,
inspector: Inspector,
enable_hub: bool,
) -> Result<BuiltinEnvironment, ModelError> {
let execution_mode = match runtime_config.debug {
true => ExecutionMode::Debug,
Expand Down Expand Up @@ -617,8 +639,13 @@ impl BuiltinEnvironment {
let stop_notifier = Arc::new(RootStopNotifier::new());
model.root.hooks.install(stop_notifier.hooks()).await;

let hub = Arc::new(Hub::new(root_component_url.as_str().to_owned())?);
model.root.hooks.install(hub.hooks()).await;
let hub = if enable_hub {
let hub = Arc::new(Hub::new(root_component_url.as_str().to_owned())?);
model.root.hooks.install(hub.hooks()).await;
Some(hub)
} else {
None
};

// Set up the Component Tree Diagnostics runtime statistics.
let component_tree_stats =
Expand Down Expand Up @@ -714,13 +741,19 @@ impl BuiltinEnvironment {
}

/// Setup a ServiceFs that contains the Hub and (optionally) the `EventSource` service.
async fn create_service_fs<'a>(&self) -> Result<ServiceFs<ServiceObj<'a, ()>>, ModelError> {
async fn create_service_fs<'a>(&self) -> Result<ServiceFs<ServiceObj<'a, ()>>, Error> {
if let None = self.hub {
bail!("Hub must be enabled if OutDirContents is not `None`");
}

// Create the ServiceFs
let mut service_fs = ServiceFs::new();

// Setup the hub
let (hub_proxy, hub_server_end) = create_proxy::<DirectoryMarker>().unwrap();
self.hub
.as_ref()
.unwrap()
.open_root(OPEN_RIGHT_READABLE | OPEN_RIGHT_WRITABLE, hub_server_end.into_channel())
.await?;
service_fs.add_remote("hub", hub_proxy);
Expand Down Expand Up @@ -750,7 +783,7 @@ impl BuiltinEnvironment {
}

/// Bind ServiceFs to a provided channel
async fn bind_service_fs(&mut self, channel: zx::Channel) -> Result<(), ModelError> {
async fn bind_service_fs(&mut self, channel: zx::Channel) -> Result<(), Error> {
let mut service_fs = self.create_service_fs().await?;

// Bind to the channel
Expand All @@ -770,7 +803,7 @@ impl BuiltinEnvironment {
}

/// Bind ServiceFs to the outgoing directory of this component, if it exists.
pub async fn bind_service_fs_to_out(&mut self) -> Result<(), ModelError> {
pub async fn bind_service_fs_to_out(&mut self) -> Result<(), Error> {
if let Some(handle) = fuchsia_runtime::take_startup_handle(
fuchsia_runtime::HandleType::DirectoryRequest.into(),
) {
Expand All @@ -786,7 +819,7 @@ impl BuiltinEnvironment {

/// Bind ServiceFs to a new channel and return the Hub directory.
/// Used mainly by integration tests.
pub async fn bind_service_fs_for_hub(&mut self) -> Result<DirectoryProxy, ModelError> {
pub async fn bind_service_fs_for_hub(&mut self) -> Result<DirectoryProxy, Error> {
// Create a channel that ServiceFs will operate on
let (service_fs_proxy, service_fs_server_end) = create_proxy::<DirectoryMarker>().unwrap();

Expand Down
28 changes: 13 additions & 15 deletions src/sys/component_manager/src/framework.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ impl CapabilityProvider for RealmCapabilityProvider {
Ok(fasync::Task::spawn(async move {
if let Err(e) = host.serve(component, stream).await {
// TODO: Set an epitaph to indicate this was an unexpected error.
warn!("serve failed: {:?}", e);
warn!("serve failed: {}", e);
}
})
.into())
Expand Down Expand Up @@ -110,7 +110,7 @@ impl RealmCapabilityHost {
let method_name = request.method_name();
let res = self.handle_request(request, &component).await;
if let Err(e) = &res {
error!("Error occurred sending Realm response for {}: {:?}", method_name, e);
error!("Error occurred sending Realm response for {}: {}", method_name, e);
}
res
})
Expand Down Expand Up @@ -167,10 +167,7 @@ impl RealmCapabilityHost {
ModelError::InstanceAlreadyExists { .. } => fcomponent::Error::InstanceAlreadyExists,
ModelError::CollectionNotFound { .. } => fcomponent::Error::CollectionNotFound,
ModelError::Unsupported { .. } => fcomponent::Error::Unsupported,
e => {
error!("add_dynamic_child() failed: {:?}", e);
fcomponent::Error::Internal
}
_ => fcomponent::Error::Internal,
})
}

Expand All @@ -183,8 +180,8 @@ impl RealmCapabilityHost {
let partial_moniker = PartialMoniker::new(child.name, child.collection);
let child = {
let state = component.lock_resolved_state().await.map_err(|e| match e {
ComponentInstanceError::ResolveFailed { moniker, err } => {
debug!("failed to resolve instance with moniker {}: {:?}", moniker, err);
ComponentInstanceError::ResolveFailed { moniker, err, .. } => {
debug!("failed to resolve instance with moniker {}: {}", moniker, err);
fcomponent::Error::InstanceCannotResolve
}
e => {
Expand All @@ -201,15 +198,15 @@ impl RealmCapabilityHost {
.await
.map_err(|e| match e {
ModelError::ResolverError { err, .. } => {
debug!("failed to resolve child: {:?}", err);
debug!("failed to resolve child: {}", err);
fcomponent::Error::InstanceCannotResolve
}
ModelError::RunnerError { err } => {
debug!("failed to start child: {:?}", err);
debug!("failed to start child: {}", err);
fcomponent::Error::InstanceCannotStart
}
e => {
error!("bind() failed: {:?}", e);
error!("bind() failed: {}", e);
fcomponent::Error::Internal
}
})?
Expand All @@ -231,11 +228,12 @@ impl RealmCapabilityHost {
()
}
Err(e) => {
error!("open_exposed() failed: {:?}", e);
debug!("open_exposed() failed: {}", e);
return Err(fcomponent::Error::Internal);
}
}
} else {
debug!("bind_child() failed: instance not found {:?}", child);
return Err(fcomponent::Error::InstanceNotFound);
}
Ok(())
Expand All @@ -253,7 +251,7 @@ impl RealmCapabilityHost {
ModelError::InstanceNotFoundInRealm { .. } => fcomponent::Error::InstanceNotFound,
ModelError::Unsupported { .. } => fcomponent::Error::Unsupported,
e => {
error!("remove_dynamic_child() failed: {:?}", e);
error!("remove_dynamic_child() failed: {}", e);
fcomponent::Error::Internal
}
})?;
Expand All @@ -274,7 +272,7 @@ impl RealmCapabilityHost {
) -> Result<(), fcomponent::Error> {
let component = component.upgrade().map_err(|_| fcomponent::Error::InstanceDied)?;
let state = component.lock_resolved_state().await.map_err(|e| {
error!("failed to resolve InstanceState: {:?}", e);
error!("failed to resolve InstanceState: {}", e);
fcomponent::Error::Internal
})?;
let decl = state.decl();
Expand Down Expand Up @@ -312,7 +310,7 @@ impl RealmCapabilityHost {
fasync::Task::spawn(async move {
if let Err(e) = Self::serve_child_iterator(children, stream, batch_size).await {
// TODO: Set an epitaph to indicate this was an unexpected error.
warn!("serve_child_iterator failed: {:?}", e);
warn!("serve_child_iterator failed: {}", e);
}
})
.detach();
Expand Down
6 changes: 1 addition & 5 deletions src/sys/component_manager/src/model/actions/delete_child.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use {
crate::model::{
actions::{Action, ActionKey, ActionSet, DestroyAction, MarkDeletedAction},
actions::{Action, ActionKey, ActionSet, DestroyAction},
component::{ComponentInstance, InstanceState},
error::ModelError,
hooks::{Event, EventPayload},
Expand Down Expand Up @@ -40,10 +40,6 @@ async fn do_delete_child(
component: &Arc<ComponentInstance>,
moniker: ChildMoniker,
) -> Result<(), ModelError> {
// Some paths may have already marked the child deleting before scheduling the DeleteChild
// action, in which case this is a no-op.
ActionSet::register(component.clone(), MarkDeletedAction::new(moniker.clone())).await?;

// The child may not exist or may already be deleted by a previous DeleteChild action.
let child = {
let state = component.lock_state().await;
Expand Down
Loading

0 comments on commit 7a9ff48

Please sign in to comment.