11// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22// SPDX-License-Identifier: Apache-2.0
33
4+ //! Crash data collector process management for Unix socket communication.
5+ //!
6+ //! This module manages the collector process that writes crash data to Unix sockets.
7+ //! The collector runs in a forked child process and is responsible for serializing
8+ //! and transmitting crash information to the receiver process.
9+ //!
10+ //! ## Communication Flow (Collector Side)
11+ //!
12+ //! The collector performs these steps to transmit crash data:
13+ //!
14+ //! 1. **Process Setup**: Forks from crashing process, closes stdio, disables SIGPIPE
15+ //! 2. **Socket Creation**: Creates `UnixStream` from inherited file descriptor
16+ //! 3. **Data Serialization**: Calls [`emit_crashreport()`] to write structured crash data
17+ //! 4. **Graceful Exit**: Flushes data and exits with `libc::_exit(0)`
18+ //!
19+ //! ```text
20+ //! ┌─────────────────────┐ ┌──────────────────────┐
21+ //! │ Signal Handler │ │ Collector Process │
22+ //! │ (Original Process) │ │ (Forked Child) │
23+ //! │ │ │ │
24+ //! │ 1. Catch crash │────fork()──────────►│ 2. Setup stdio │
25+ //! │ 2. Fork collector │ │ 3. Create UnixStream │
26+ //! │ 3. Wait for child │ │ 4. Write crash data │
27+ //! │ │◄────wait()──────────│ 5. Exit cleanly │
28+ //! └─────────────────────┘ └──────────────────────┘
29+ //! ```
30+ //!
31+ //! ## Signal Safety
32+ //!
33+ //! All collector operations use only async-signal-safe functions since the collector
34+ //! runs in a signal handler context:
35+ //!
36+ //! - No memory allocations
37+ //! - Pre-prepared data structures
38+ //! - Only safe system calls
39+ //!
40+ //! For complete protocol documentation, see [`crate::shared::unix_socket_communication`].
41+ //!
42+ //! [`emit_crashreport()`]: crate::collector::emitters::emit_crashreport
43+
444use super :: process_handle:: ProcessHandle ;
545use super :: receiver_manager:: Receiver ;
646use ddcommon:: timeout:: TimeoutManager ;
@@ -25,6 +65,42 @@ pub enum CollectorSpawnError {
2565}
2666
2767impl Collector {
68+ /// Spawns a collector process to write crash data to the Unix socket.
69+ ///
70+ /// This method forks a child process that will serialize and transmit crash data
71+ /// to the receiver process via the Unix socket established in the receiver.
72+ ///
73+ /// ## Process Architecture
74+ ///
75+ /// ```text
76+ /// Parent Process (Signal Handler) Child Process (Collector)
77+ /// ┌─────────────────────────────┐ ┌─────────────────────────────┐
78+ /// │ 1. Catches crash signal │ │ 4. Closes stdio (0,1,2) │
79+ /// │ 2. Forks collector process │──►│ 5. Disables SIGPIPE │
80+ /// │ 3. Returns to caller │ │ 6. Creates UnixStream │
81+ /// │ │ │ 7. Calls emit_crashreport() │
82+ /// │ │ │ 8. Exits with _exit(0) │
83+ /// └─────────────────────────────┘ └─────────────────────────────┘
84+ /// ```
85+ ///
86+ /// ## Arguments
87+ ///
88+ /// * `receiver` - The receiver process that will read crash data from the Unix socket
89+ /// * `config` - Crash tracker configuration
90+ /// * `config_str` - JSON-serialized configuration string
91+ /// * `metadata_str` - JSON-serialized metadata string
92+ /// * `sig_info` - Signal information from the crash
93+ /// * `ucontext` - Process context at crash time
94+ ///
95+ /// ## Returns
96+ ///
97+ /// * `Ok(Collector)` - Handle to the spawned collector process
98+ /// * `Err(CollectorSpawnError::ForkFailed)` - If the fork operation fails
99+ ///
100+ /// ## Safety
101+ ///
102+ /// This function is called from signal handler context and uses only async-signal-safe operations.
103+ /// The child process performs all potentially unsafe operations after fork.
28104 pub ( crate ) fn spawn (
29105 receiver : & Receiver ,
30106 config : & CrashtrackerConfiguration ,
@@ -33,8 +109,8 @@ impl Collector {
33109 sig_info : * const siginfo_t ,
34110 ucontext : * const ucontext_t ,
35111 ) -> Result < Self , CollectorSpawnError > {
36- // When we spawn the child, our pid becomes the ppid.
37- // SAFETY: This function has no safety requirements .
112+ // When we spawn the child, our pid becomes the ppid for process tracking .
113+ // SAFETY: getpid() is async-signal-safe .
38114 let pid = unsafe { libc:: getpid ( ) } ;
39115
40116 let fork_result = alt_fork ( ) ;
@@ -66,6 +142,42 @@ impl Collector {
66142 }
67143}
68144
145+ /// Collector child process entry point - serializes and transmits crash data via Unix socket.
146+ ///
147+ /// This function runs in the forked collector process and performs the actual crash data
148+ /// transmission. It establishes the Unix socket connection and writes all crash information
149+ /// using the structured protocol.
150+ ///
151+ /// ## Process Flow
152+ ///
153+ /// 1. **Isolate from parent**: Closes stdin, stdout, stderr to prevent interference
154+ /// 2. **Signal handling**: Disables SIGPIPE to handle broken pipe gracefully
155+ /// 3. **Socket setup**: Creates `UnixStream` from inherited file descriptor
156+ /// 4. **Data transmission**: Calls [`emit_crashreport()`] to write structured crash data
157+ /// 5. **Clean exit**: Exits with `_exit(0)` to avoid cleanup issues
158+ ///
159+ /// ## Communication Protocol
160+ ///
161+ /// The crash data is written as a structured stream with delimited sections:
162+ /// - Metadata, Configuration, Signal Info, Process Context
163+ /// - Counters, Spans, Tags, Traces, Memory Maps, Stack Trace
164+ /// - Completion marker
165+ ///
166+ /// For details, see [`crate::shared::unix_socket_communication`].
167+ ///
168+ /// ## Arguments
169+ ///
170+ /// * `config` - Crash tracker configuration object
171+ /// * `config_str` - JSON-serialized configuration for receiver
172+ /// * `metadata_str` - JSON-serialized metadata for receiver
173+ /// * `sig_info` - Signal information from crash context
174+ /// * `ucontext` - Processor context at crash time
175+ /// * `uds_fd` - Unix socket file descriptor for writing crash data
176+ /// * `ppid` - Parent process ID for identification
177+ ///
178+ /// This function never returns - it always exits via `_exit(0)` or `terminate()`.
179+ ///
180+ /// [`emit_crashreport()`]: crate::collector::emitters::emit_crashreport
69181pub ( crate ) fn run_collector_child (
70182 config : & CrashtrackerConfiguration ,
71183 config_str : & str ,
@@ -75,22 +187,24 @@ pub(crate) fn run_collector_child(
75187 uds_fd : RawFd ,
76188 ppid : libc:: pid_t ,
77189) -> ! {
78- // Close stdio
79- let _ = unsafe { libc:: close ( 0 ) } ;
80- let _ = unsafe { libc:: close ( 1 ) } ;
81- let _ = unsafe { libc:: close ( 2 ) } ;
190+ // Close stdio to isolate from parent process and prevent interference with crash data transmission
191+ let _ = unsafe { libc:: close ( 0 ) } ; // stdin
192+ let _ = unsafe { libc:: close ( 1 ) } ; // stdout
193+ let _ = unsafe { libc:: close ( 2 ) } ; // stderr
82194
83- // Disable SIGPIPE
195+ // Disable SIGPIPE - if receiver closes socket early, we want to handle it gracefully
196+ // rather than being killed by SIGPIPE
84197 let _ = unsafe {
85198 signal:: sigaction (
86199 signal:: SIGPIPE ,
87200 & SigAction :: new ( SigHandler :: SigIgn , SaFlags :: empty ( ) , SigSet :: empty ( ) ) ,
88201 )
89202 } ;
90203
91- // Emit crashreport
204+ // Create Unix socket stream for crash data transmission
92205 let mut unix_stream = unsafe { UnixStream :: from_raw_fd ( uds_fd) } ;
93206
207+ // Serialize and transmit all crash data using structured protocol
94208 let report = emit_crashreport (
95209 & mut unix_stream,
96210 config,
0 commit comments