Skip to content

Commit 646869b

Browse files
authored
Merge pull request #386 from mxgrey/rcl_lifecycles
Correct rcl entity lifecycles and fix spurious test failures
2 parents 0578a76 + 58b2c66 commit 646869b

15 files changed

+512
-263
lines changed

rclrs/Cargo.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@ path = "src/lib.rs"
1414
# Please keep the list of dependencies alphabetically sorted,
1515
# and also state why each dependency is needed.
1616
[dependencies]
17-
# Needed for dynamically finding type support libraries
17+
# Needed for dynamically finding type support libraries
1818
ament_rs = { version = "0.2", optional = true }
19+
1920
# Needed for uploading documentation to docs.rs
2021
cfg-if = "1.0.0"
2122

2223
# Needed for clients
2324
futures = "0.3"
25+
2426
# Needed for dynamic messages
2527
libloading = { version = "0.8", optional = true }
28+
2629
# Needed for the Message trait, among others
2730
rosidl_runtime_rs = "0.4"
2831

rclrs/src/client.rs

+41-27
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,38 @@ use rosidl_runtime_rs::Message;
99

1010
use crate::error::{RclReturnCode, ToResult};
1111
use crate::MessageCow;
12-
use crate::{rcl_bindings::*, RclrsError};
12+
use crate::{rcl_bindings::*, NodeHandle, RclrsError, ENTITY_LIFECYCLE_MUTEX};
1313

1414
// SAFETY: The functions accessing this type, including drop(), shouldn't care about the thread
1515
// they are running in. Therefore, this type can be safely sent to another thread.
1616
unsafe impl Send for rcl_client_t {}
1717

18-
/// Internal struct used by clients.
18+
/// Manage the lifecycle of an `rcl_client_t`, including managing its dependencies
19+
/// on `rcl_node_t` and `rcl_context_t` by ensuring that these dependencies are
20+
/// [dropped after][1] the `rcl_client_t`.
21+
///
22+
/// [1]: <https://doc.rust-lang.org/reference/destructors.html>
1923
pub struct ClientHandle {
20-
rcl_client_mtx: Mutex<rcl_client_t>,
21-
rcl_node_mtx: Arc<Mutex<rcl_node_t>>,
24+
rcl_client: Mutex<rcl_client_t>,
25+
node_handle: Arc<NodeHandle>,
2226
pub(crate) in_use_by_wait_set: Arc<AtomicBool>,
2327
}
2428

2529
impl ClientHandle {
2630
pub(crate) fn lock(&self) -> MutexGuard<rcl_client_t> {
27-
self.rcl_client_mtx.lock().unwrap()
31+
self.rcl_client.lock().unwrap()
2832
}
2933
}
3034

3135
impl Drop for ClientHandle {
3236
fn drop(&mut self) {
33-
let rcl_client = self.rcl_client_mtx.get_mut().unwrap();
34-
let rcl_node_mtx = &mut *self.rcl_node_mtx.lock().unwrap();
35-
// SAFETY: No preconditions for this function
37+
let rcl_client = self.rcl_client.get_mut().unwrap();
38+
let mut rcl_node = self.node_handle.rcl_node.lock().unwrap();
39+
let _lifecycle_lock = ENTITY_LIFECYCLE_MUTEX.lock().unwrap();
40+
// SAFETY: The entity lifecycle mutex is locked to protect against the risk of
41+
// global variables in the rmw implementation being unsafely modified during cleanup.
3642
unsafe {
37-
rcl_client_fini(rcl_client, rcl_node_mtx);
43+
rcl_client_fini(rcl_client, &mut *rcl_node);
3844
}
3945
}
4046
}
@@ -74,7 +80,7 @@ where
7480
T: rosidl_runtime_rs::Service,
7581
{
7682
/// Creates a new client.
77-
pub(crate) fn new(rcl_node_mtx: Arc<Mutex<rcl_node_t>>, topic: &str) -> Result<Self, RclrsError>
83+
pub(crate) fn new(node_handle: Arc<NodeHandle>, topic: &str) -> Result<Self, RclrsError>
7884
// This uses pub(crate) visibility to avoid instantiating this struct outside
7985
// [`Node::create_client`], see the struct's documentation for the rationale
8086
where
@@ -92,24 +98,32 @@ where
9298
// SAFETY: No preconditions for this function.
9399
let client_options = unsafe { rcl_client_get_default_options() };
94100

95-
unsafe {
96-
// SAFETY: The rcl_client is zero-initialized as expected by this function.
97-
// The rcl_node is kept alive because it is co-owned by the client.
98-
// The topic name and the options are copied by this function, so they can be dropped
99-
// afterwards.
100-
rcl_client_init(
101-
&mut rcl_client,
102-
&*rcl_node_mtx.lock().unwrap(),
103-
type_support,
104-
topic_c_string.as_ptr(),
105-
&client_options,
106-
)
107-
.ok()?;
101+
{
102+
let rcl_node = node_handle.rcl_node.lock().unwrap();
103+
let _lifecycle_lock = ENTITY_LIFECYCLE_MUTEX.lock().unwrap();
104+
105+
// SAFETY:
106+
// * The rcl_client was zero-initialized as expected by this function.
107+
// * The rcl_node is kept alive by the NodeHandle because it is a dependency of the client.
108+
// * The topic name and the options are copied by this function, so they can be dropped
109+
// afterwards.
110+
// * The entity lifecycle mutex is locked to protect against the risk of global
111+
// variables in the rmw implementation being unsafely modified during initialization.
112+
unsafe {
113+
rcl_client_init(
114+
&mut rcl_client,
115+
&*rcl_node,
116+
type_support,
117+
topic_c_string.as_ptr(),
118+
&client_options,
119+
)
120+
.ok()?;
121+
}
108122
}
109123

110124
let handle = Arc::new(ClientHandle {
111-
rcl_client_mtx: Mutex::new(rcl_client),
112-
rcl_node_mtx,
125+
rcl_client: Mutex::new(rcl_client),
126+
node_handle,
113127
in_use_by_wait_set: Arc::new(AtomicBool::new(false)),
114128
});
115129

@@ -245,8 +259,8 @@ where
245259
///
246260
pub fn service_is_ready(&self) -> Result<bool, RclrsError> {
247261
let mut is_ready = false;
248-
let client = &mut *self.handle.rcl_client_mtx.lock().unwrap();
249-
let node = &mut *self.handle.rcl_node_mtx.lock().unwrap();
262+
let client = &mut *self.handle.rcl_client.lock().unwrap();
263+
let node = &mut *self.handle.node_handle.rcl_node.lock().unwrap();
250264

251265
unsafe {
252266
// SAFETY both node and client are guaranteed to be valid here

rclrs/src/context.rs

+143-25
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,31 @@ use std::vec::Vec;
77
use crate::rcl_bindings::*;
88
use crate::{RclrsError, ToResult};
99

10+
/// This is locked whenever initializing or dropping any middleware entity
11+
/// because we have found issues in RCL and some RMW implementations that
12+
/// make it unsafe to simultaneously initialize and/or drop middleware
13+
/// entities such as `rcl_context_t` and `rcl_node_t` as well middleware
14+
/// primitives such as `rcl_publisher_t`, `rcl_subscription_t`, etc.
15+
/// It seems these C and C++ based libraries will regularly use
16+
/// unprotected global variables in their object initialization and cleanup.
17+
///
18+
/// Further discussion with the RCL team may help to improve the RCL
19+
/// documentation to specifically call out where these risks are present. For
20+
/// now we lock this mutex for any RCL function that carries reasonable suspicion
21+
/// of a risk.
22+
pub(crate) static ENTITY_LIFECYCLE_MUTEX: Mutex<()> = Mutex::new(());
23+
1024
impl Drop for rcl_context_t {
1125
fn drop(&mut self) {
1226
unsafe {
1327
// The context may be invalid when rcl_init failed, e.g. because of invalid command
1428
// line arguments.
15-
// SAFETY: No preconditions for this function.
29+
30+
// SAFETY: No preconditions for rcl_context_is_valid.
1631
if rcl_context_is_valid(self) {
17-
// SAFETY: These functions have no preconditions besides a valid rcl_context
32+
let _lifecycle_lock = ENTITY_LIFECYCLE_MUTEX.lock().unwrap();
33+
// SAFETY: The entity lifecycle mutex is locked to protect against the risk of
34+
// global variables in the rmw implementation being unsafely modified during cleanup.
1835
rcl_shutdown(self);
1936
rcl_context_fini(self);
2037
}
@@ -39,16 +56,26 @@ unsafe impl Send for rcl_context_t {}
3956
/// - the allocator used (left as the default by `rclrs`)
4057
///
4158
pub struct Context {
42-
pub(crate) rcl_context_mtx: Arc<Mutex<rcl_context_t>>,
59+
pub(crate) handle: Arc<ContextHandle>,
60+
}
61+
62+
/// This struct manages the lifetime and access to the `rcl_context_t`. It will also
63+
/// account for the lifetimes of any dependencies, if we need to add
64+
/// dependencies in the future (currently there are none). It is not strictly
65+
/// necessary to decompose `Context` and `ContextHandle` like this, but we are
66+
/// doing it to be consistent with the lifecycle management of other rcl
67+
/// bindings in this library.
68+
pub(crate) struct ContextHandle {
69+
pub(crate) rcl_context: Mutex<rcl_context_t>,
4370
}
4471

4572
impl Context {
4673
/// Creates a new context.
4774
///
48-
/// Usually, this would be called with `std::env::args()`, analogously to `rclcpp::init()`.
75+
/// Usually this would be called with `std::env::args()`, analogously to `rclcpp::init()`.
4976
/// See also the official "Passing ROS arguments to nodes via the command-line" tutorial.
5077
///
51-
/// Creating a context can fail in case the args contain invalid ROS arguments.
78+
/// Creating a context will fail if the args contain invalid ROS arguments.
5279
///
5380
/// # Example
5481
/// ```
@@ -58,6 +85,21 @@ impl Context {
5885
/// assert!(Context::new(invalid_remapping).is_err());
5986
/// ```
6087
pub fn new(args: impl IntoIterator<Item = String>) -> Result<Self, RclrsError> {
88+
Self::new_with_options(args, InitOptions::new())
89+
}
90+
91+
/// Same as [`Context::new`] except you can additionally provide initialization options.
92+
///
93+
/// # Example
94+
/// ```
95+
/// use rclrs::{Context, InitOptions};
96+
/// let context = Context::new_with_options([], InitOptions::new().with_domain_id(Some(5))).unwrap();
97+
/// assert_eq!(context.domain_id(), 5);
98+
/// ````
99+
pub fn new_with_options(
100+
args: impl IntoIterator<Item = String>,
101+
options: InitOptions,
102+
) -> Result<Self, RclrsError> {
61103
// SAFETY: Getting a zero-initialized value is always safe
62104
let mut rcl_context = unsafe { rcl_get_zero_initialized_context() };
63105
let cstring_args: Vec<CString> = args
@@ -74,48 +116,124 @@ impl Context {
74116
unsafe {
75117
// SAFETY: No preconditions for this function.
76118
let allocator = rcutils_get_default_allocator();
77-
// SAFETY: Getting a zero-initialized value is always safe.
78-
let mut rcl_init_options = rcl_get_zero_initialized_init_options();
79-
// SAFETY: Passing in a zero-initialized value is expected.
80-
// In the case where this returns not ok, there's nothing to clean up.
81-
rcl_init_options_init(&mut rcl_init_options, allocator).ok()?;
82-
// SAFETY: This function does not store the ephemeral init_options and c_args
83-
// pointers. Passing in a zero-initialized rcl_context is expected.
84-
let ret = rcl_init(
85-
c_args.len() as i32,
86-
if c_args.is_empty() {
87-
std::ptr::null()
88-
} else {
89-
c_args.as_ptr()
90-
},
91-
&rcl_init_options,
92-
&mut rcl_context,
93-
)
94-
.ok();
119+
let mut rcl_init_options = options.into_rcl(allocator)?;
120+
// SAFETY:
121+
// * This function does not store the ephemeral init_options and c_args pointers.
122+
// * Passing in a zero-initialized rcl_context is mandatory.
123+
// * The entity lifecycle mutex is locked to protect against the risk of global variables
124+
// in the rmw implementation being unsafely modified during initialization.
125+
let ret = {
126+
let _lifecycle_lock = ENTITY_LIFECYCLE_MUTEX.lock().unwrap();
127+
rcl_init(
128+
c_args.len() as i32,
129+
if c_args.is_empty() {
130+
std::ptr::null()
131+
} else {
132+
c_args.as_ptr()
133+
},
134+
&rcl_init_options,
135+
&mut rcl_context,
136+
)
137+
.ok()
138+
};
95139
// SAFETY: It's safe to pass in an initialized object.
96140
// Early return will not leak memory, because this is the last fini function.
97141
rcl_init_options_fini(&mut rcl_init_options).ok()?;
98142
// Move the check after the last fini()
99143
ret?;
100144
}
101145
Ok(Self {
102-
rcl_context_mtx: Arc::new(Mutex::new(rcl_context)),
146+
handle: Arc::new(ContextHandle {
147+
rcl_context: Mutex::new(rcl_context),
148+
}),
103149
})
104150
}
105151

152+
/// Returns the ROS domain ID that the context is using.
153+
///
154+
/// The domain ID controls which nodes can send messages to each other, see the [ROS 2 concept article][1].
155+
/// It can be set through the `ROS_DOMAIN_ID` environment variable.
156+
///
157+
/// [1]: https://docs.ros.org/en/rolling/Concepts/About-Domain-ID.html
158+
pub fn domain_id(&self) -> usize {
159+
let mut domain_id: usize = 0;
160+
let ret = unsafe {
161+
rcl_context_get_domain_id(
162+
&mut *self.handle.rcl_context.lock().unwrap(),
163+
&mut domain_id,
164+
)
165+
};
166+
167+
debug_assert_eq!(ret, 0);
168+
domain_id
169+
}
170+
106171
/// Checks if the context is still valid.
107172
///
108173
/// This will return `false` when a signal has caused the context to shut down (currently
109174
/// unimplemented).
110175
pub fn ok(&self) -> bool {
111176
// This will currently always return true, but once we have a signal handler, the signal
112177
// handler could call `rcl_shutdown()`, hence making the context invalid.
113-
let rcl_context = &mut *self.rcl_context_mtx.lock().unwrap();
178+
let rcl_context = &mut *self.handle.rcl_context.lock().unwrap();
114179
// SAFETY: No preconditions for this function.
115180
unsafe { rcl_context_is_valid(rcl_context) }
116181
}
117182
}
118183

184+
/// Additional options for initializing the Context.
185+
#[derive(Default, Clone)]
186+
pub struct InitOptions {
187+
/// The domain ID that should be used by the Context. Set to None to ask for
188+
/// the default behavior, which is to set the domain ID according to the
189+
/// [ROS_DOMAIN_ID][1] environment variable.
190+
///
191+
/// [1]: https://docs.ros.org/en/rolling/Concepts/Intermediate/About-Domain-ID.html#the-ros-domain-id
192+
domain_id: Option<usize>,
193+
}
194+
195+
impl InitOptions {
196+
/// Create a new InitOptions with all default values.
197+
pub fn new() -> InitOptions {
198+
Self::default()
199+
}
200+
201+
/// Transform an InitOptions into a new one with a certain domain_id
202+
pub fn with_domain_id(mut self, domain_id: Option<usize>) -> InitOptions {
203+
self.domain_id = domain_id;
204+
self
205+
}
206+
207+
/// Set the domain_id of an InitOptions, or reset it to the default behavior
208+
/// (determined by environment variables) by providing None.
209+
pub fn set_domain_id(&mut self, domain_id: Option<usize>) {
210+
self.domain_id = domain_id;
211+
}
212+
213+
/// Get the domain_id that will be provided by these InitOptions.
214+
pub fn domain_id(&self) -> Option<usize> {
215+
self.domain_id
216+
}
217+
218+
fn into_rcl(self, allocator: rcutils_allocator_s) -> Result<rcl_init_options_t, RclrsError> {
219+
unsafe {
220+
// SAFETY: Getting a zero-initialized value is always safe.
221+
let mut rcl_init_options = rcl_get_zero_initialized_init_options();
222+
// SAFETY: Passing in a zero-initialized value is expected.
223+
// In the case where this returns not ok, there's nothing to clean up.
224+
rcl_init_options_init(&mut rcl_init_options, allocator).ok()?;
225+
226+
// We only need to set the domain_id if the user asked for something
227+
// other than None. When the user asks for None, that is equivalent
228+
// to the default value in rcl_init_options.
229+
if let Some(domain_id) = self.domain_id {
230+
rcl_init_options_set_domain_id(&mut rcl_init_options, domain_id);
231+
}
232+
Ok(rcl_init_options)
233+
}
234+
}
235+
}
236+
119237
#[cfg(test)]
120238
mod tests {
121239
use super::*;

rclrs/src/executor.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ impl SingleThreadedExecutor {
4242
for node in { self.nodes_mtx.lock().unwrap() }
4343
.iter()
4444
.filter_map(Weak::upgrade)
45-
.filter(|node| unsafe { rcl_context_is_valid(&*node.rcl_context_mtx.lock().unwrap()) })
45+
.filter(|node| unsafe {
46+
rcl_context_is_valid(&*node.handle.context_handle.rcl_context.lock().unwrap())
47+
})
4648
{
4749
let wait_set = WaitSet::new_for_node(&node)?;
4850
let ready_entities = wait_set.wait(timeout)?;

0 commit comments

Comments
 (0)