From e37ed85cd50d8880325c8327654bd2c974b2927d Mon Sep 17 00:00:00 2001 From: Ben Frederickson Date: Sun, 25 Apr 2021 14:11:50 -0700 Subject: [PATCH] Tweak subprocess retry logic (#377) Change subprocess retry logic to handle the case where the main process is a python process and no children processes have been created yet. Also add a basic integration test for the case where python subprocesses are delayed in starting (from a non-python root process). --- src/sampler.rs | 49 ++++++++++++++++----------------- tests/integration_test.rs | 25 +++++++++++++---- tests/scripts/delayed_launch.sh | 2 ++ 3 files changed, 45 insertions(+), 31 deletions(-) create mode 100644 tests/scripts/delayed_launch.sh diff --git a/src/sampler.rs b/src/sampler.rs index 2b934701..ccf3ab3c 100644 --- a/src/sampler.rs +++ b/src/sampler.rs @@ -83,38 +83,35 @@ impl Sampler { /// Creates a new sampler object that samples any python process in the /// process or child processes fn new_subprocess_sampler(pid: Pid, config: &Config) -> Result { + let process = remoteprocess::Process::new(pid)?; + // Initialize a PythonSpy object per child, and build up the process tree let mut spies = HashMap::new(); - + let mut retries = 10; spies.insert(pid, PythonSpyThread::new(pid, None, &config)?); - let process = remoteprocess::Process::new(pid)?; - // Try a few times to get the process' children to give time to the process to start (?) - // Allows to properly get children on Windows 10 in a venv with -s flag - let children = { - let mut tmp_children = process.child_processes()?; - let mut retries = 10; - while retries > 0 && tmp_children.len() == 0 { - std::thread::sleep(std::time::Duration::from_millis(100)); - tmp_children = process.child_processes()?; - retries -= 1; + loop { + for (childpid, parentpid) in process.child_processes()? { + // If we can't create the child process, don't worry about it + // can happen with zombie child processes etc + match PythonSpyThread::new(childpid, Some(parentpid), &config) { + Ok(spy) => { spies.insert(childpid, spy); }, + Err(e) => { warn!("Failed to open process {}: {}", childpid, e); } + } } - tmp_children - }; - - for (childpid, parentpid) in children { - // If we can't create the child process, don't worry about it - // can happen with zombie child processes etc - match PythonSpyThread::new(childpid, Some(parentpid), &config) { - Ok(spy) => { spies.insert(childpid, spy); }, - Err(e) => { warn!("Failed to open process {}: {}", childpid, e); } + + // wait for all the various python spy objects to initialize, and break out of here + // if we have one of them started. + if spies.values_mut().any(|spy| spy.wait_initialized()) { + break; } - } - // wait for all the various python spy objects to initialize, and if none - // of them initialize appropiately fail right away - if spies.values_mut().all(|spy| !spy.wait_initialized()) { - return Err(format_err!("No python processes found in process {} or any of its subprocesses", pid)); + // Otherwise sleep for a short time and retry + retries -= 1; + if retries == 0 { + return Err(format_err!("No python processes found in process {} or any of its subprocesses", pid)); + } + std::thread::sleep(std::time::Duration::from_millis(100)); } // Create a new thread to periodically monitor for new child processes, and update @@ -327,4 +324,4 @@ fn get_process_info(pid: Pid, spies: &HashMap) -> Option ScriptRunner { - let child = std::process::Command::new("python").arg(filename).spawn().unwrap(); + fn new(process_name: &str, filename: &str) -> ScriptRunner { + let child = std::process::Command::new(process_name).arg(filename).spawn().unwrap(); ScriptRunner{child} } @@ -32,7 +32,7 @@ struct TestRunner { impl TestRunner { fn new(config: Config, filename: &str) -> TestRunner { - let child = ScriptRunner::new(filename); + let child = ScriptRunner::new("python", filename); std::thread::sleep(std::time::Duration::from_millis(400)); let spy = PythonSpy::retry_new(child.id(), &config, 20).unwrap(); TestRunner{child, spy} @@ -298,7 +298,7 @@ fn test_subprocesses() { // We used to not be able to create a sampler object if one of the child processes // was in a zombie state. Verify that this works now - let process = ScriptRunner::new("./tests/scripts/subprocesses.py"); + let process = ScriptRunner::new("python", "./tests/scripts/subprocesses.py"); std::thread::sleep(std::time::Duration::from_millis(1000)); let config = Config{subprocesses: true, ..Default::default()}; let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); @@ -335,7 +335,7 @@ fn test_subprocesses_zombiechild() { // We used to not be able to create a sampler object if one of the child processes // was in a zombie state. Verify that this works now - let process = ScriptRunner::new("./tests/scripts/subprocesses_zombie_child.py"); + let process = ScriptRunner::new("python", "./tests/scripts/subprocesses_zombie_child.py"); std::thread::sleep(std::time::Duration::from_millis(200)); let config = Config{subprocesses: true, ..Default::default()}; let _sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); @@ -374,3 +374,18 @@ fn test_negative_linenumber_increment() { _ => panic!("Unknown python major version") } } + +#[cfg(target_os="linux")] +#[test] +fn test_delayed_subprocess() { + let process = ScriptRunner::new("bash", "./tests/scripts/delayed_launch.sh"); + let config = Config{subprocesses: true, ..Default::default()}; + let sampler = py_spy::sampler::Sampler::new(process.id(), &config).unwrap(); + for sample in sampler { + // should have one trace from the subprocess + let traces = sample.traces; + assert_eq!(traces.len(), 1); + assert!(traces[0].pid != process.id()); + break; + } +} diff --git a/tests/scripts/delayed_launch.sh b/tests/scripts/delayed_launch.sh new file mode 100644 index 00000000..c50177c0 --- /dev/null +++ b/tests/scripts/delayed_launch.sh @@ -0,0 +1,2 @@ +sleep 0.5 +python -c "import time; time.sleep(1000)"