From 884f398637d697d2e1cf50a9ccd008d2757f2f16 Mon Sep 17 00:00:00 2001
From: Eric Snow <ericsnowcurrently@gmail.com>
Date: Tue, 1 Oct 2024 12:24:30 -0600
Subject: [PATCH 1/3] Revert "gh-124785: Revert "gh-116510: Fix crash due to
 shared immortal interned strings (gh-124646)" (gh-124807)"

This reverts commit 7bdfabe2d1ec353ecdc75a5aec41cce83e572391.
---
 ...-09-26-18-21-06.gh-issue-116510.FacUWO.rst |  5 ++
 Objects/unicodeobject.c                       | 48 ++++++++++++++++---
 2 files changed, 47 insertions(+), 6 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
new file mode 100644
index 00000000000000..e3741321006548
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst
@@ -0,0 +1,5 @@
+Fix a crash caused by immortal interned strings being shared between
+sub-interpreters that use basic single-phase init.  In that case, the string
+can be used by an interpreter that outlives the interpreter that created and
+interned it.  For interpreters that share obmalloc state, also share the
+interned dict with the main interpreter.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index e9589cfe44f3bf..0f502ccdaf5767 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -282,13 +282,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
     }
 }
 
+/* Return true if this interpreter should share the main interpreter's
+   intern_dict.  That's important for interpreters which load basic
+   single-phase init extension modules (m_size == -1).  There could be interned
+   immortal strings that are shared between interpreters, due to the
+   PyDict_Update(mdict, m_copy) call in import_find_extension().
+
+   It's not safe to deallocate those strings until all interpreters that
+   potentially use them are freed.  By storing them in the main interpreter, we
+   ensure they get freed after all other interpreters are freed.
+*/
+static bool
+has_shared_intern_dict(PyInterpreterState *interp)
+{
+    PyInterpreterState *main_interp = _PyInterpreterState_Main();
+    return interp != main_interp  && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
+}
+
 static int
 init_interned_dict(PyInterpreterState *interp)
 {
     assert(get_interned_dict(interp) == NULL);
-    PyObject *interned = interned = PyDict_New();
-    if (interned == NULL) {
-        return -1;
+    PyObject *interned;
+    if (has_shared_intern_dict(interp)) {
+        interned = get_interned_dict(_PyInterpreterState_Main());
+        Py_INCREF(interned);
+    }
+    else {
+        interned = PyDict_New();
+        if (interned == NULL) {
+            return -1;
+        }
     }
     _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
     return 0;
@@ -299,7 +323,10 @@ clear_interned_dict(PyInterpreterState *interp)
 {
     PyObject *interned = get_interned_dict(interp);
     if (interned != NULL) {
-        PyDict_Clear(interned);
+        if (!has_shared_intern_dict(interp)) {
+            // only clear if the dict belongs to this interpreter
+            PyDict_Clear(interned);
+        }
         Py_DECREF(interned);
         _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
     }
@@ -15618,6 +15645,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
     }
     assert(PyDict_CheckExact(interned));
 
+    if (has_shared_intern_dict(interp)) {
+        // the dict doesn't belong to this interpreter, skip the debug
+        // checks on it and just clear the pointer to it
+        clear_interned_dict(interp);
+        return;
+    }
+
 #ifdef INTERNED_STATS
     fprintf(stderr, "releasing %zd interned strings\n",
             PyDict_GET_SIZE(interned));
@@ -16126,8 +16160,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
 {
     struct _Py_unicode_state *state = &interp->unicode;
 
-    // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
-    assert(get_interned_dict(interp) == NULL);
+    if (!has_shared_intern_dict(interp)) {
+        // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+        assert(get_interned_dict(interp) == NULL);
+    }
 
     _PyUnicode_FiniEncodings(&state->fs_codec);
 

From 623fb87b6f8f5dad770cdd5c44fe1b65ae3265a3 Mon Sep 17 00:00:00 2001
From: Eric Snow <ericsnowcurrently@gmail.com>
Date: Tue, 1 Oct 2024 12:21:11 -0600
Subject: [PATCH 2/3] Move non-isolated immortal objects to the right ref
 chain.

---
 Objects/object.c        | 18 ++++++++++++++++++
 Objects/unicodeobject.c |  7 +++++++
 2 files changed, 25 insertions(+)

diff --git a/Objects/object.c b/Objects/object.c
index 8a819dd336e421..706498406f1fe7 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2491,6 +2491,24 @@ _Py_ResurrectReference(PyObject *op)
 
 
 #ifdef Py_TRACE_REFS
+void
+_Py_NormalizeImmortalReference(PyObject *op)
+{
+    assert(_Py_IsImmortal(op));
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    if (!_PyRefchain_IsTraced(interp, op)) {
+        return;
+    }
+    PyInterpreterState *main_interp = _PyInterpreterState_Main();
+    if (interp != main_interp
+           && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC)
+    {
+        assert(!_PyRefchain_IsTraced(main_interp, op));
+        _PyRefchain_Remove(interp, op);
+        _PyRefchain_Trace(main_interp, op);
+    }
+}
+
 void
 _Py_ForgetReference(PyObject *op)
 {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 0f502ccdaf5767..efde81307168a2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15437,6 +15437,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p)
     assert(*p);
 }
 
+#ifdef Py_TRACE_REFS
+extern void _Py_NormalizeImmortalReference(PyObject *);
+#endif
+
 static void
 immortalize_interned(PyObject *s)
 {
@@ -15452,6 +15456,9 @@ immortalize_interned(PyObject *s)
 #endif
     _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
     _Py_SetImmortal(s);
+#ifdef Py_TRACE_REFS
+    _Py_NormalizeImmortalReference(s);
+#endif
 }
 
 static /* non-null */ PyObject*

From fcc6a061240859ebf3a058cfe850b1a252b4dea3 Mon Sep 17 00:00:00 2001
From: Eric Snow <ericsnowcurrently@gmail.com>
Date: Tue, 1 Oct 2024 14:34:13 -0600
Subject: [PATCH 3/3] Add some comments.

---
 Objects/object.c        | 18 ++++++++++++++++++
 Objects/unicodeobject.c |  1 +
 2 files changed, 19 insertions(+)

diff --git a/Objects/object.c b/Objects/object.c
index 706498406f1fe7..8d809158a6c1da 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -2491,6 +2491,24 @@ _Py_ResurrectReference(PyObject *op)
 
 
 #ifdef Py_TRACE_REFS
+/* Make sure the ref is associated with the right interpreter.
+ * This only needs special attention for heap-allocated objects
+ * that have been immortalized, and only when the object might
+ * outlive the interpreter where it was created.  That means the
+ * object was necessarily created using a global allocator
+ * (i.e. from the main interpreter).  Thus in that specific case
+ * we move the object over to the main interpreter's refchain.
+ *
+ * This was added for the sake of the immortal interned strings,
+ * where legacy subinterpreters share the main interpreter's
+ * interned dict (and allocator), and therefore the strings can
+ * outlive the subinterpreter.
+ *
+ * It may make sense to fold this into _Py_SetImmortalUntracked(),
+ * but that requires further investigation.  In the meantime, it is
+ * up to the caller to know if this is needed.  There should be
+ * very few cases.
+ */
 void
 _Py_NormalizeImmortalReference(PyObject *op)
 {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index efde81307168a2..fb62b8aa3cb1a3 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -15457,6 +15457,7 @@ immortalize_interned(PyObject *s)
     _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL;
     _Py_SetImmortal(s);
 #ifdef Py_TRACE_REFS
+    /* Make sure the ref is associated with the right interpreter. */
     _Py_NormalizeImmortalReference(s);
 #endif
 }