From 0693e77e3beafc7c6d6addab46be714a92ef0253 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 10 Nov 2023 09:55:52 +0100 Subject: [PATCH] gh-111262: Add PyDict_Pop() function _PyDict_Pop_KnownHash(): remove the default value and the return type becomes an int. Co-Authored-By: Stefan Behnel Co-authored-by: Antoine Pitrou --- Doc/c-api/dict.rst | 17 +++ Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.13.rst | 6 ++ Include/dictobject.h | 2 + Include/internal/pycore_dict.h | 6 +- Lib/test/test_capi/test_dict.py | 29 +++++ Lib/test/test_stable_abi_ctypes.py | 1 + ...-11-10-10-21-38.gh-issue-111262.2utB5m.rst | 4 + Misc/stable_abi.toml | 2 + Modules/_functoolsmodule.c | 28 ++--- Modules/_testcapi/dict.c | 23 +++- Modules/_threadmodule.c | 7 +- Modules/socketmodule.c | 7 +- Objects/dictobject.c | 102 ++++++++++++------ Objects/odictobject.c | 5 +- Objects/structseq.c | 15 ++- PC/python3dll.c | 1 + Python/import.c | 4 +- Python/sysmodule.c | 5 +- 19 files changed, 198 insertions(+), 67 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index 8ee351918006e4b..21f9305b7667635 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -173,6 +173,23 @@ Dictionary Objects .. versionadded:: 3.4 + +.. c:function:: int PyDict_Pop(PyObject *p, PyObject *key, PyObject **result) + + Remove *key* from dictionary *p* and optionally return the removed value. + Do not raise :exc:`KeyError` if the key missing. + + - If the key is present, set *\*result* to a new reference to the removed + value if *result* is not ``NULL``, and return ``1``. + - If the key is missing, set *\*result* to ``NULL`` if *result* is not + ``NULL``, and return ``0``. + - On error, raise an exception and return ``-1``. + + This is the similar to :meth:`dict.pop`, but without the default value and + do not raise :exc:`KeyError` if the key missing. + + .. versionadded:: 3.13 + .. c:function:: PyObject* PyDict_Items(PyObject *p) Return a :c:type:`PyListObject` containing all the items from the dictionary. diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 811b1bd84d24174..6f0c3ca3fdfc9c8 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -121,6 +121,7 @@ function,PyDict_Merge,3.2,, function,PyDict_MergeFromSeq2,3.2,, function,PyDict_New,3.2,, function,PyDict_Next,3.2,, +function,PyDict_Pop,3.13,, function,PyDict_SetItem,3.2,, function,PyDict_SetItemString,3.2,, function,PyDict_Size,3.2,, diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 9f9239a7eeb0368..3becd8e32699761 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1168,6 +1168,12 @@ New Features Python ``list.extend()`` and ``list.clear()`` methods. (Contributed by Victor Stinner in :gh:`111138`.) +* Add :c:func:`PyDict_Pop` function: remove a key from a dictionary and + optionally return the removed value. This is the similar to :meth:`dict.pop`, + but without the default value and do not raise :exc:`KeyError` if the key + missing. + (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/dictobject.h b/Include/dictobject.h index 1bbeec1ab699e75..491914f513f3858 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -66,6 +66,8 @@ PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); // - On error, raise an exception and return -1. PyAPI_FUNC(int) PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result); PyAPI_FUNC(int) PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result); + +PyAPI_FUNC(int) PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result); #endif #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index d01ef55de51f5d1..89f30a452c0c643 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -116,7 +116,11 @@ extern PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); extern int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); -extern PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *); +extern int _PyDict_Pop_KnownHash( + PyDictObject *dict, + PyObject *key, + Py_hash_t hash, + PyObject **result); #define DKIX_EMPTY (-1) #define DKIX_DUMMY (-2) /* Used internally */ diff --git a/Lib/test/test_capi/test_dict.py b/Lib/test/test_capi/test_dict.py index 67f12a56313b6f4..f404b3ef995692b 100644 --- a/Lib/test/test_capi/test_dict.py +++ b/Lib/test/test_capi/test_dict.py @@ -432,6 +432,35 @@ def test_dict_mergefromseq2(self): # CRASHES mergefromseq2({}, NULL, 0) # CRASHES mergefromseq2(NULL, {}, 0) + def test_dict_pop(self): + # Test PyDict_Pop() + dict_pop = _testcapi.dict_pop + + # key present + mydict = {"key": "value", "key2": "value2"} + self.assertEqual(dict_pop(mydict, "key"), (1, "value")) + self.assertEqual(mydict, {"key2": "value2"}) + self.assertEqual(dict_pop(mydict, "key2"), (1, "value2")) + self.assertEqual(mydict, {}) + + # key missing; empty dict has a fast path + self.assertEqual(dict_pop({}, "key"), (0, None)) + self.assertEqual(dict_pop({"a": 1}, "key"), (0, None)) + + # dict error + not_dict = "string" + self.assertRaises(SystemError, dict_pop, not_dict, "key") + + # key error; don't hash key if dict is empty + not_hashable_key = ["list"] + self.assertEqual(dict_pop({}, not_hashable_key), (0, None)) + with self.assertRaises(TypeError): + dict_pop({'key': 1}, not_hashable_key) + dict_pop({}, NULL) # key is not checked if dict is empty + + # CRASHES dict_pop(NULL, "key") + # CRASHES dict_pop({"a": 1}, NULL, default) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 4976ac3642bbe46..6cd9eb5794c19a9 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -154,6 +154,7 @@ def test_windows_feature_macros(self): "PyDict_MergeFromSeq2", "PyDict_New", "PyDict_Next", + "PyDict_Pop", "PyDict_SetItem", "PyDict_SetItemString", "PyDict_Size", diff --git a/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst b/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst new file mode 100644 index 000000000000000..636905549f14473 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-10-10-21-38.gh-issue-111262.2utB5m.rst @@ -0,0 +1,4 @@ +Add :c:func:`PyDict_Pop` function: remove a key from a dictionary and +optionally return the removed value. This is the similar to :meth:`dict.pop`, +but without the default value and do not raise :exc:`KeyError` if the key +missing. Patch by Stefan Behnel and Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 22b25dd0ec141fd..e6f464986fbb1fa 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2481,3 +2481,5 @@ [function._Py_SetRefcnt] added = '3.13' abi_only = true +[function.PyDict_Pop] + added = '3.13' diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 8ea493ad9ab278e..ca440e4c70c0d9c 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1087,19 +1087,9 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds The cache dict holds one reference to the link. We created one other reference when the link was created. The linked list only has borrowed references. */ - popresult = _PyDict_Pop_KnownHash(self->cache, link->key, - link->hash, Py_None); - if (popresult == Py_None) { - /* Getting here means that the user function call or another - thread has already removed the old key from the dictionary. - This link is now an orphan. Since we don't want to leave the - cache in an inconsistent state, we don't restore the link. */ - Py_DECREF(popresult); - Py_DECREF(link); - Py_DECREF(key); - return result; - } - if (popresult == NULL) { + int res = _PyDict_Pop_KnownHash((PyDictObject*)self->cache, link->key, + link->hash, &popresult); + if (res < 0) { /* An error arose while trying to remove the oldest key (the one being evicted) from the cache. We restore the link to its original position as the oldest link. Then we allow the @@ -1110,10 +1100,22 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds Py_DECREF(result); return NULL; } + if (res == 0) { + /* Getting here means that the user function call or another + thread has already removed the old key from the dictionary. + This link is now an orphan. Since we don't want to leave the + cache in an inconsistent state, we don't restore the link. */ + assert(popresult == NULL); + Py_DECREF(link); + Py_DECREF(key); + return result; + } + /* Keep a reference to the old key and old result to prevent their ref counts from going to zero during the update. That will prevent potentially arbitrary object clean-up code (i.e. __del__) from running while we're still adjusting the links. */ + assert(popresult != NULL); oldkey = link->key; oldresult = link->result; diff --git a/Modules/_testcapi/dict.c b/Modules/_testcapi/dict.c index 5f6a1a037dcba29..6ba19af636d647f 100644 --- a/Modules/_testcapi/dict.c +++ b/Modules/_testcapi/dict.c @@ -331,6 +331,27 @@ dict_mergefromseq2(PyObject *self, PyObject *args) } +static PyObject * +dict_pop(PyObject *self, PyObject *args) +{ + PyObject *dict, *key; + if (!PyArg_ParseTuple(args, "OO", &dict, &key)) { + return NULL; + } + NULLABLE(dict); + NULLABLE(key); + PyObject *result = UNINITIALIZED_PTR; + int res = PyDict_Pop(dict, key, &result); + if (res < 0) { + return NULL; + } + if (result == NULL) { + result = Py_NewRef(Py_None); + } + return Py_BuildValue("iN", res, result); +} + + static PyMethodDef test_methods[] = { {"dict_check", dict_check, METH_O}, {"dict_checkexact", dict_checkexact, METH_O}, @@ -358,7 +379,7 @@ static PyMethodDef test_methods[] = { {"dict_merge", dict_merge, METH_VARARGS}, {"dict_update", dict_update, METH_VARARGS}, {"dict_mergefromseq2", dict_mergefromseq2, METH_VARARGS}, - + {"dict_pop", dict_pop, METH_VARARGS}, {NULL}, }; diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 88ca9032b5e6798..c608789b5fbd5c8 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -967,11 +967,8 @@ local_clear(localobject *self) HEAD_UNLOCK(runtime); while (tstate) { if (tstate->dict) { - PyObject *v = _PyDict_Pop(tstate->dict, self->key, Py_None); - if (v != NULL) { - Py_DECREF(v); - } - else { + if (PyDict_Pop(tstate->dict, self->key, NULL) < 0) { + // Silently ignore error PyErr_Clear(); } } diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 97f4d1746667501..30a389d25e2e331 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -396,12 +396,11 @@ remove_unusable_flags(PyObject *m) if (flag_name == NULL) { return -1; } - PyObject *v = _PyDict_Pop(dict, flag_name, Py_None); - Py_DECREF(flag_name); - if (v == NULL) { + if (PyDict_Pop(dict, flag_name, NULL) < 0) { + Py_DECREF(flag_name); return -1; } - Py_DECREF(v); + Py_DECREF(flag_name); } } return 0; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 719d438897ca6cf..66a8e87d895d9ad 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2226,64 +2226,102 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) return _PyDict_Next(op, ppos, pkey, pvalue, NULL); } + /* Internal version of dict.pop(). */ -PyObject * -_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt) +int +_PyDict_Pop_KnownHash(PyDictObject *mp, PyObject *key, Py_hash_t hash, + PyObject **result) { - Py_ssize_t ix; - PyObject *old_value; - PyDictObject *mp; - PyInterpreterState *interp = _PyInterpreterState_GET(); - - assert(PyDict_Check(dict)); - mp = (PyDictObject *)dict; + assert(PyDict_Check(mp)); if (mp->ma_used == 0) { - if (deflt) { - return Py_NewRef(deflt); + if (result) { + *result = NULL; } - _PyErr_SetKeyError(key); - return NULL; + return 0; } - ix = _Py_dict_lookup(mp, key, hash, &old_value); - if (ix == DKIX_ERROR) - return NULL; + + PyObject *old_value; + Py_ssize_t ix = _Py_dict_lookup(mp, key, hash, &old_value); + if (ix == DKIX_ERROR) { + if (result) { + *result = NULL; + } + return -1; + } + if (ix == DKIX_EMPTY || old_value == NULL) { - if (deflt) { - return Py_NewRef(deflt); + if (result) { + *result = NULL; } - _PyErr_SetKeyError(key); - return NULL; + return 0; } + assert(old_value != NULL); + PyInterpreterState *interp = _PyInterpreterState_GET(); uint64_t new_version = _PyDict_NotifyEvent( interp, PyDict_EVENT_DELETED, mp, key, NULL); delitem_common(mp, hash, ix, Py_NewRef(old_value), new_version); ASSERT_CONSISTENT(mp); - return old_value; + if (result) { + *result = old_value; + } + else { + Py_DECREF(old_value); + } + return 1; } -PyObject * -_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) + +int +PyDict_Pop(PyObject *op, PyObject *key, PyObject **result) { - Py_hash_t hash; + if (!PyDict_Check(op)) { + if (result) { + *result = NULL; + } + PyErr_BadInternalCall(); + return -1; + } + PyDictObject *dict = (PyDictObject *)op; - if (((PyDictObject *)dict)->ma_used == 0) { - if (deflt) { - return Py_NewRef(deflt); + if (dict->ma_used == 0) { + if (result) { + *result = NULL; } - _PyErr_SetKeyError(key); - return NULL; + return 0; } + + Py_hash_t hash; if (!PyUnicode_CheckExact(key) || (hash = unicode_get_hash(key)) == -1) { hash = PyObject_Hash(key); - if (hash == -1) - return NULL; + if (hash == -1) { + if (result) { + *result = NULL; + } + return -1; + } } - return _PyDict_Pop_KnownHash(dict, key, hash, deflt); + return _PyDict_Pop_KnownHash(dict, key, hash, result); } + +PyObject * +_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value) +{ + PyObject *result; + if (PyDict_Pop(dict, key, &result) == 0) { + if (default_value != NULL) { + return Py_NewRef(default_value); + } + _PyErr_SetKeyError(key); + return NULL; + } + return result; +} + + /* Internal version of dict.from_keys(). It is subclass-friendly. */ PyObject * _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) diff --git a/Objects/odictobject.c b/Objects/odictobject.c index b99896319e0136c..b5280c39e1be542 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1049,7 +1049,10 @@ _odict_popkey_hash(PyObject *od, PyObject *key, PyObject *failobj, return NULL; } /* Now delete the value from the dict. */ - value = _PyDict_Pop_KnownHash(od, key, hash, failobj); + if (_PyDict_Pop_KnownHash((PyDictObject *)od, key, hash, + &value) == 0) { + value = Py_NewRef(failobj); + } } else if (value == NULL && !PyErr_Occurred()) { /* Apply the fallback value, if necessary. */ diff --git a/Objects/structseq.c b/Objects/structseq.c index db4aebebdd8404e..d454a0d1335c358 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -8,7 +8,6 @@ */ #include "Python.h" -#include "pycore_dict.h" // _PyDict_Pop() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_modsupport.h" // _PyArg_NoPositional() #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -421,12 +420,18 @@ structseq_replace(PyStructSequence *self, PyObject *args, PyObject *kwargs) if (!key) { goto error; } - PyObject *ob = _PyDict_Pop(kwargs, key, self->ob_item[i]); - Py_DECREF(key); - if (!ob) { + PyObject *ob; + if (PyDict_Pop(kwargs, key, &ob) < 0) { + Py_DECREF(key); goto error; } - result->ob_item[i] = ob; + Py_DECREF(key); + if (ob != NULL) { + result->ob_item[i] = ob; + } + else { + result->ob_item[i] = Py_NewRef(self->ob_item[i]); + } } // Check if there are any unexpected fields. if (PyDict_GET_SIZE(kwargs) > 0) { diff --git a/PC/python3dll.c b/PC/python3dll.c index 07aa84c91f9fc7f..8876f3607b863f6 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -184,6 +184,7 @@ EXPORT_FUNC(PyDict_Merge) EXPORT_FUNC(PyDict_MergeFromSeq2) EXPORT_FUNC(PyDict_New) EXPORT_FUNC(PyDict_Next) +EXPORT_FUNC(PyDict_Pop) EXPORT_FUNC(PyDict_SetItem) EXPORT_FUNC(PyDict_SetItemString) EXPORT_FUNC(PyDict_Size) diff --git a/Python/import.c b/Python/import.c index b6ffba5c5746e2d..b5e21202979c8bd 100644 --- a/Python/import.c +++ b/Python/import.c @@ -395,8 +395,8 @@ remove_module(PyThreadState *tstate, PyObject *name) PyObject *modules = MODULES(tstate->interp); if (PyDict_CheckExact(modules)) { - PyObject *mod = _PyDict_Pop(modules, name, Py_None); - Py_XDECREF(mod); + // Error is reported to the caller + (void)PyDict_Pop(modules, name, NULL); } else if (PyMapping_DelItem(modules, name) < 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index e28523284f15176..eee74a5c45e6bf9 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -125,11 +125,10 @@ sys_set_object(PyInterpreterState *interp, PyObject *key, PyObject *v) } PyObject *sd = interp->sysdict; if (v == NULL) { - v = _PyDict_Pop(sd, key, Py_None); - if (v == NULL) { + if (PyDict_Pop(sd, key, &v) < 0) { return -1; } - Py_DECREF(v); + Py_XDECREF(v); return 0; } else {