From 6eace1a46771eb76cdc8a7f1c24dd02a1cc7ee36 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Sun, 21 May 2023 12:54:25 -0700
Subject: [PATCH 1/6] First version adding `__notes__` to
 `error_already_set::what()` output.

---
 include/pybind11/pytypes.h | 60 +++++++++++++++++++++++++++++++++-----
 tests/test_exceptions.py   | 32 +++++++++++++++-----
 2 files changed, 77 insertions(+), 15 deletions(-)

diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index f11ed5da78..78ea882655 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -472,12 +472,15 @@ inline const char *obj_class_name(PyObject *obj) {
 std::string error_string();
 
 struct error_fetch_and_normalize {
-    // Immediate normalization is long-established behavior (starting with
-    // https://github.com/pybind/pybind11/commit/135ba8deafb8bf64a15b24d1513899eb600e2011
-    // from Sep 2016) and safest. Normalization could be deferred, but this could mask
-    // errors elsewhere, the performance gain is very minor in typical situations
-    // (usually the dominant bottleneck is EH unwinding), and the implementation here
-    // would be more complex.
+    // This comment only applies to Python <= 3.11:
+    //     Immediate normalization is long-established behavior (starting with
+    //     https://github.com/pybind/pybind11/commit/135ba8deafb8bf64a15b24d1513899eb600e2011
+    //     from Sep 2016) and safest. Normalization could be deferred, but this could mask
+    //     errors elsewhere, the performance gain is very minor in typical situations
+    //     (usually the dominant bottleneck is EH unwinding), and the implementation here
+    //     would be more complex.
+    // Starting with Python 3.12, PyErr_Fetch() normalizes exceptions immediately.
+    // Any errors during normalization are tracked under __notes__.
     explicit error_fetch_and_normalize(const char *called) {
         PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
         if (!m_type) {
@@ -492,6 +495,7 @@ struct error_fetch_and_normalize {
                             "of the original active exception type.");
         }
         m_lazy_error_string = exc_type_name_orig;
+#if PY_VERSION_HEX < 0x030C0000
         // PyErr_NormalizeException() may change the exception type if there are cascading
         // failures. This can potentially be extremely confusing.
         PyErr_NormalizeException(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
@@ -506,12 +510,12 @@ struct error_fetch_and_normalize {
                           + " failed to obtain the name "
                             "of the normalized active exception type.");
         }
-#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x07030a00
+#    if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x07030a00
         // This behavior runs the risk of masking errors in the error handling, but avoids a
         // conflict with PyPy, which relies on the normalization here to change OSError to
         // FileNotFoundError (https://github.com/pybind/pybind11/issues/4075).
         m_lazy_error_string = exc_type_name_norm;
-#else
+#    else
         if (exc_type_name_norm != m_lazy_error_string) {
             std::string msg = std::string(called)
                               + ": MISMATCH of original and normalized "
@@ -523,6 +527,12 @@ struct error_fetch_and_normalize {
             msg += ": " + format_value_and_trace();
             pybind11_fail(msg);
         }
+#    endif
+#else // Python 3.12+
+      // The presence of __notes__ could be due to exception normalization errors.
+        if (PyObject_HasAttrString(m_value.ptr(), "__notes__")) {
+            m_lazy_error_string += "[WITH __notes__]";
+        }
 #endif
     }
 
@@ -558,6 +568,40 @@ struct error_fetch_and_normalize {
                     }
                 }
             }
+#if PY_VERSION_HEX >= 0x030B0000
+            auto notes
+                = reinterpret_steal<object>(PyObject_GetAttrString(m_value.ptr(), "__notes__"));
+            if (!notes) {
+                PyErr_Clear(); // No notes is good news.
+            } else {
+                auto len_notes = PyList_Size(notes.ptr());
+                if (PyErr_Occurred()) {
+                    result += "\nFAILURE obtaining len(__notes__): " + detail::error_string();
+                } else {
+                    result += "\n__notes__ (len=" + std::to_string(len_notes) + "):";
+                    for (ssize_t i = 0; i < len_notes; i++) {
+                        PyObject *note = PyList_GET_ITEM(notes.ptr(), i);
+                        auto note_bytes = reinterpret_steal<object>(
+                            PyUnicode_AsEncodedString(note, "utf-8", "backslashreplace"));
+                        if (!note_bytes) {
+                            result += "\nFAILURE obtaining __notes__[" + std::to_string(i)
+                                      + "]: " + detail::error_string();
+                        } else {
+                            char *buffer = nullptr;
+                            Py_ssize_t length = 0;
+                            if (PyBytes_AsStringAndSize(note_bytes.ptr(), &buffer, &length)
+                                == -1) {
+                                result += "\nFAILURE formatting __notes__[" + std::to_string(i)
+                                          + "]: " + detail::error_string();
+                            } else {
+                                result += '\n';
+                                result += std::string(buffer, static_cast<std::size_t>(length));
+                            }
+                        }
+                    }
+                }
+            }
+#endif
         } else {
             result = "<MESSAGE UNAVAILABLE>";
         }
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
index a92ab59a34..24992fec0d 100644
--- a/tests/test_exceptions.py
+++ b/tests/test_exceptions.py
@@ -317,13 +317,7 @@ def test_error_already_set_what_with_happy_exceptions(
     assert what == expected_what
 
 
-@pytest.mark.skipif(
-    # Intentionally very specific:
-    "sys.version_info == (3, 12, 0, 'alpha', 7)",
-    reason="WIP: https://github.com/python/cpython/issues/102594",
-)
-@pytest.mark.skipif("env.PYPY", reason="PyErr_NormalizeException Segmentation fault")
-def test_flaky_exception_failure_point_init():
+def _test_flaky_exception_failure_point_init_before_py_3_12():
     with pytest.raises(RuntimeError) as excinfo:
         m.error_already_set_what(FlakyException, ("failure_point_init",))
     lines = str(excinfo.value).splitlines()
@@ -340,6 +334,30 @@ def test_flaky_exception_failure_point_init():
     assert lines[4].endswith("): test_flaky_exception_failure_point_init")
 
 
+def _test_flaky_exception_failure_point_init_py_3_12():
+    # Behavior change in Python 3.12: https://github.com/python/cpython/issues/102594
+    what, py_err_set_after_what = m.error_already_set_what(
+        FlakyException, ("failure_point_init",)
+    )
+    assert not py_err_set_after_what
+    lines = what.splitlines()
+    assert lines[0].endswith("ValueError[WITH __notes__]: triggered_failure_point_init")
+    assert lines[1] == "__notes__ (len=1):"
+    assert "Normalization failed:" in lines[2]
+    assert "FlakyException" in lines[2]
+
+
+@pytest.mark.skipif(
+    "env.PYPY and sys.version_info[:2] < (3, 12)",
+    reason="PyErr_NormalizeException Segmentation fault",
+)
+def test_flaky_exception_failure_point_init():
+    if sys.version_info[:2] < (3, 12):
+        _test_flaky_exception_failure_point_init_before_py_3_12()
+    else:
+        _test_flaky_exception_failure_point_init_py_3_12()
+
+
 def test_flaky_exception_failure_point_str():
     what, py_err_set_after_what = m.error_already_set_what(
         FlakyException, ("failure_point_str",)

From 5d349f6fdebe07484dc8015cc8ad51b4b143a34d Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Sun, 21 May 2023 19:44:06 -0700
Subject: [PATCH 2/6] Fix trivial oversight (missing adjustment in existing
 test).

---
 tests/test_exceptions.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
index 24992fec0d..ccac4536d6 100644
--- a/tests/test_exceptions.py
+++ b/tests/test_exceptions.py
@@ -331,7 +331,9 @@ def _test_flaky_exception_failure_point_init_before_py_3_12():
     # Checking the first two lines of the traceback as formatted in error_string():
     assert "test_exceptions.py(" in lines[3]
     assert lines[3].endswith("): __init__")
-    assert lines[4].endswith("): test_flaky_exception_failure_point_init")
+    assert lines[4].endswith(
+        "): _test_flaky_exception_failure_point_init_before_py_3_12"
+    )
 
 
 def _test_flaky_exception_failure_point_init_py_3_12():

From 5751217bcc1ed79a3998da54debfa93b0463e251 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Sun, 21 May 2023 20:27:57 -0700
Subject: [PATCH 3/6] Minor enhancements of new code.

---
 include/pybind11/pytypes.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index 78ea882655..a8061b5539 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -495,7 +495,14 @@ struct error_fetch_and_normalize {
                             "of the original active exception type.");
         }
         m_lazy_error_string = exc_type_name_orig;
-#if PY_VERSION_HEX < 0x030C0000
+#if PY_VERSION_HEX >= 0x030C0000
+        // The presence of __notes__ is likely due to exception normalization
+        // errors, although that is not necessarily true, therefore insert a
+        // hint only:
+        if (PyObject_HasAttrString(m_value.ptr(), "__notes__")) {
+            m_lazy_error_string += "[WITH __notes__]";
+        }
+#else
         // PyErr_NormalizeException() may change the exception type if there are cascading
         // failures. This can potentially be extremely confusing.
         PyErr_NormalizeException(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
@@ -528,11 +535,6 @@ struct error_fetch_and_normalize {
             pybind11_fail(msg);
         }
 #    endif
-#else // Python 3.12+
-      // The presence of __notes__ could be due to exception normalization errors.
-        if (PyObject_HasAttrString(m_value.ptr(), "__notes__")) {
-            m_lazy_error_string += "[WITH __notes__]";
-        }
 #endif
     }
 
@@ -575,7 +577,7 @@ struct error_fetch_and_normalize {
                 PyErr_Clear(); // No notes is good news.
             } else {
                 auto len_notes = PyList_Size(notes.ptr());
-                if (PyErr_Occurred()) {
+                if (len_notes < 0) {
                     result += "\nFAILURE obtaining len(__notes__): " + detail::error_string();
                 } else {
                     result += "\n__notes__ (len=" + std::to_string(len_notes) + "):";

From 60816285e9e7b95b7d33a60805888b80b2bec641 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Sun, 21 May 2023 20:30:49 -0700
Subject: [PATCH 4/6] Re-enable `cmake --target cpptest -j 2`

---
 .github/workflows/upstream.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml
index 4acfbfce75..39b943b415 100644
--- a/.github/workflows/upstream.yml
+++ b/.github/workflows/upstream.yml
@@ -1,4 +1,3 @@
-
 name: Upstream
 
 on:
@@ -65,8 +64,8 @@ jobs:
     - name: Python tests C++11
       run: cmake --build build11 --target pytest -j 2
 
-    # - name: C++11 tests
-    #   run: cmake --build build11  --target cpptest -j 2
+    - name: C++11 tests
+      run: cmake --build build11  --target cpptest -j 2
 
     - name: Interface test C++11
       run: cmake --build build11 --target test_cmake_build
@@ -86,8 +85,8 @@ jobs:
     - name: Python tests
       run: cmake --build build17 --target pytest
 
-    # - name: C++ tests
-    #   run: cmake --build build17 --target cpptest
+    - name: C++ tests
+      run: cmake --build build17 --target cpptest
 
     # Third build - C++17 mode with unstable ABI
     - name: Configure (unstable ABI)

From 732fc7b09b49a03b63d22adadc3d7a3249ff2489 Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Sun, 21 May 2023 20:56:17 -0700
Subject: [PATCH 5/6] Revert "Re-enable `cmake --target cpptest -j 2`"

This reverts commit 60816285e9e7b95b7d33a60805888b80b2bec641.
---
 .github/workflows/upstream.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml
index 39b943b415..4acfbfce75 100644
--- a/.github/workflows/upstream.yml
+++ b/.github/workflows/upstream.yml
@@ -1,3 +1,4 @@
+
 name: Upstream
 
 on:
@@ -64,8 +65,8 @@ jobs:
     - name: Python tests C++11
       run: cmake --build build11 --target pytest -j 2
 
-    - name: C++11 tests
-      run: cmake --build build11  --target cpptest -j 2
+    # - name: C++11 tests
+    #   run: cmake --build build11  --target cpptest -j 2
 
     - name: Interface test C++11
       run: cmake --build build11 --target test_cmake_build
@@ -85,8 +86,8 @@ jobs:
     - name: Python tests
       run: cmake --build build17 --target pytest
 
-    - name: C++ tests
-      run: cmake --build build17 --target cpptest
+    # - name: C++ tests
+    #   run: cmake --build build17 --target cpptest
 
     # Third build - C++17 mode with unstable ABI
     - name: Configure (unstable ABI)

From edd6026007326a280565fbe9d8a31284c1e7296f Mon Sep 17 00:00:00 2001
From: "Ralf W. Grosse-Kunstleve" <rwgk@google.com>
Date: Mon, 22 May 2023 04:43:41 -0700
Subject: [PATCH 6/6] Add general comment explaining why the
 `error_fetch_and_normalize` code is so unusual.

---
 include/pybind11/pytypes.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index a8061b5539..f5d3f34f38 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -471,6 +471,14 @@ inline const char *obj_class_name(PyObject *obj) {
 
 std::string error_string();
 
+// The code in this struct is very unusual, to minimize the chances of
+// masking bugs (elsewhere) by errors during the error handling (here).
+// This is meant to be a lifeline for troubleshooting long-running processes
+// that crash under conditions that are virtually impossible to reproduce.
+// Low-level implementation alternatives are preferred to higher-level ones
+// that might raise cascading exceptions. Last-ditch-kind-of attempts are made
+// to report as much of the original error as possible, even if there are
+// secondary issues obtaining some of the details.
 struct error_fetch_and_normalize {
     // This comment only applies to Python <= 3.11:
     //     Immediate normalization is long-established behavior (starting with