From 324a609b4540b2e87cd81f361a117433b617b469 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Wed, 17 Mar 2021 14:07:52 -0700 Subject: [PATCH] Fix a deadlock involve `scheduler.all_digests`, and add a note. (#11723) ### Problem By chance, a garbage collection thread started up and called `lease_files_in_graph` while I happened to be running Pants in the foreground. This caused a deadlock because `lease_files_in_graph` was not releasing the GIL before touching the `Graph` (via the `Scheduler`)... and foreground interactions with the `Graph` frequently need to acquire the GIL to check for equality. ### Solution Move the problematic call to `scheduler.all_digests()` under `allow_threads` and add a note. Additionally, move one other potentially-problematic method call under `allow_threads`. ### Result A rare (hopefully?) deadlock is prevented. See #11722 for how we might make this less likely in the future. [ci skip-build-wheels]# Delete this line to force CI to run the JVM tests. [ci skip-jvm-tests] --- src/rust/engine/src/externs/interface.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/rust/engine/src/externs/interface.rs b/src/rust/engine/src/externs/interface.rs index 872d87352f5..3348b68fb8e 100644 --- a/src/rust/engine/src/externs/interface.rs +++ b/src/rust/engine/src/externs/interface.rs @@ -1125,8 +1125,8 @@ fn graph_visualize( with_scheduler(py, scheduler_ptr, |scheduler| { with_session(py, session_ptr, |session| { let path = PathBuf::from(path); - scheduler - .visualize(session, path.as_path()) + // NB: See the note on with_scheduler re: allow_threads. + py.allow_threads(|| scheduler.visualize(session, path.as_path())) .map_err(|e| { let e = format!("Failed to visualize to {}: {:?}", path.display(), e); PyErr::new::(py, (e,)) @@ -1252,8 +1252,9 @@ fn lease_files_in_graph( ) -> PyUnitResult { with_scheduler(py, scheduler_ptr, |scheduler| { with_session(py, session_ptr, |session| { - let digests = scheduler.all_digests(session); + // NB: See the note on with_scheduler re: allow_threads. py.allow_threads(|| { + let digests = scheduler.all_digests(session); scheduler .core .executor @@ -1700,6 +1701,11 @@ where /// context methods provide immutable references. The remaining types are not intended to be shared /// between threads, so mutable access is provided. /// +/// TODO: The `Scheduler` and `Session` objects both have lots of internal locks: in general, the GIL +/// should be released (using `py.allow_thread(|| ..)`) before any non-trivial interactions with +/// them. In particular: methods that use the `Graph` should be called outside the GIL. We should +/// make this less error prone: see https://github.com/pantsbuild/pants/issues/11722. +/// fn with_scheduler(py: Python, scheduler_ptr: PyScheduler, f: F) -> T where F: FnOnce(&Scheduler) -> T,