diff --git a/Doc/library/bisect.rst b/Doc/library/bisect.rst index 6bf7814b257f4a..3cede24bdc8297 100644 --- a/Doc/library/bisect.rst +++ b/Doc/library/bisect.rst @@ -21,21 +21,36 @@ example of the algorithm (the boundary conditions are already right!). The following functions are provided: -.. function:: bisect_left(a, x, lo=0, hi=len(a)) +.. function:: bisect_left(a, x, lo=0, hi=len(a), *, key=None, reverse=False) Locate the insertion point for *x* in *a* to maintain sorted order. The parameters *lo* and *hi* may be used to specify a subset of the list - which should be considered; by default the entire list is used. If *x* is - already present in *a*, the insertion point will be before (to the left of) - any existing entries. The return value is suitable for use as the first - parameter to ``list.insert()`` assuming that *a* is already sorted. + which should be considered; by default the entire list is used. + + The parameter *key* specifies a function of one argument that is used to + extract a comparison key from each element in *a* and from *x* (for example, + ``key=str.lower``). The default value is ``None`` (compare the elements + directly). + + .. note:: + + When specifying a custom *key* function, you should wrap it with + :func:`functools.lru_cache` if the *key* function is not already fast. + + The parameter *reverse* is a boolean value. If set to ``True``, the list is + supposed to be sorted in descending order. + + If *x* is already present in *a*, the insertion point will be before + (to the left of) any existing entries. The return value is suitable for + use as the first parameter to ``list.insert()`` assuming that *a* is + already sorted according to *key*. The returned insertion point *i* partitions the array *a* into two halves so that ``all(val < x for val in a[lo:i])`` for the left side and ``all(val >= x for val in a[i:hi])`` for the right side. -.. function:: bisect_right(a, x, lo=0, hi=len(a)) - bisect(a, x, lo=0, hi=len(a)) +.. function:: bisect_right(a, x, lo=0, hi=len(a), *, key=None, reverse=False) + bisect(a, x, lo=0, hi=len(a), *, key=None, reverse=False) Similar to :func:`bisect_left`, but returns an insertion point which comes after (to the right of) any existing entries of *x* in *a*. @@ -44,15 +59,15 @@ The following functions are provided: that ``all(val <= x for val in a[lo:i])`` for the left side and ``all(val > x for val in a[i:hi])`` for the right side. -.. function:: insort_left(a, x, lo=0, hi=len(a)) +.. function:: insort_left(a, x, lo=0, hi=len(a), *, key=None, reverse=False) Insert *x* in *a* in sorted order. This is equivalent to ``a.insert(bisect.bisect_left(a, x, lo, hi), x)`` assuming that *a* is already sorted. Keep in mind that the O(log n) search is dominated by the slow O(n) insertion step. -.. function:: insort_right(a, x, lo=0, hi=len(a)) - insort(a, x, lo=0, hi=len(a)) +.. function:: insort_right(a, x, lo=0, hi=len(a), *, key=None, reverse=False) + insort(a, x, lo=0, hi=len(a), *, key=None, reverse=False) Similar to :func:`insort_left`, but inserting *x* in *a* after any existing entries of *x*. @@ -74,37 +89,37 @@ can be tricky or awkward to use for common searching tasks. The following five functions show how to transform them into the standard lookups for sorted lists:: - def index(a, x): + def index(a, x, *, key=None, reverse=False): 'Locate the leftmost value exactly equal to x' - i = bisect_left(a, x) + i = bisect_left(a, x, key=key) if i != len(a) and a[i] == x: return i raise ValueError - def find_lt(a, x): + def find_lt(a, x, *, key=None, reverse=False): 'Find rightmost value less than x' - i = bisect_left(a, x) + i = bisect_left(a, x, key=key) if i: return a[i-1] raise ValueError - def find_le(a, x): + def find_le(a, x, *, key=None, reverse=False): 'Find rightmost value less than or equal to x' - i = bisect_right(a, x) + i = bisect_right(a, x, key=key) if i: return a[i-1] raise ValueError - def find_gt(a, x): + def find_gt(a, x, *, key=None, reverse=False): 'Find leftmost value greater than x' - i = bisect_right(a, x) + i = bisect_right(a, x, key=key) if i != len(a): return a[i] raise ValueError - def find_ge(a, x): + def find_ge(a, x, *, key=None, reverse=False): 'Find leftmost item greater than or equal to x' - i = bisect_left(a, x) + i = bisect_left(a, x, key=key) if i != len(a): return a[i] raise ValueError diff --git a/Doc/tools/susp-ignored.csv b/Doc/tools/susp-ignored.csv index 85263d47c8bba8..72fd636ade626e 100644 --- a/Doc/tools/susp-ignored.csv +++ b/Doc/tools/susp-ignored.csv @@ -106,8 +106,8 @@ howto/regex,,::, howto/regex,,:foo,(?:foo) howto/urllib2,,:password,"""joe:password@example.com""" library/audioop,,:ipos,"# factor = audioop.findfactor(in_test[ipos*2:ipos*2+len(out_test)]," -library/bisect,32,:hi,all(val >= x for val in a[i:hi]) -library/bisect,42,:hi,all(val > x for val in a[i:hi]) +library/bisect,,:hi,all(val >= x for val in a[i:hi]) +library/bisect,,:hi,all(val > x for val in a[i:hi]) library/configparser,,:home,my_dir: ${Common:home_dir}/twosheds library/configparser,,:option,${section:option} library/configparser,,:path,python_dir: ${Frameworks:path}/Python/Versions/${Frameworks:Python} diff --git a/Lib/bisect.py b/Lib/bisect.py index 9786fc9d87c5ef..64052f905ce8f7 100644 --- a/Lib/bisect.py +++ b/Lib/bisect.py @@ -1,18 +1,21 @@ """Bisection algorithms.""" -def insort_right(a, x, lo=0, hi=None): +def insort_right(a, x, lo=0, hi=None, *, key=None, reverse=False): """Insert item x in list a, and keep it sorted assuming a is sorted. If x is already in a, insert it to the right of the rightmost x. Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + Optional argument key is a function of one argument used to + customize the order. """ - lo = bisect_right(a, x, lo, hi) + lo = bisect_right(a, x, lo, hi, key=key, reverse=reverse) a.insert(lo, x) -def bisect_right(a, x, lo=0, hi=None): +def bisect_right(a, x, lo=0, hi=None, *, key=None, reverse=False): """Return the index where to insert item x in list a, assuming a is sorted. The return value i is such that all e in a[:i] have e <= x, and all e in @@ -21,32 +24,70 @@ def bisect_right(a, x, lo=0, hi=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + Optional argument key is a function of one argument used to + customize the order. """ if lo < 0: raise ValueError('lo must be non-negative') if hi is None: hi = len(a) + + if key is None: + if reverse: + while lo < hi: + mid = (lo+hi)//2 + if x > a[mid]: + hi = mid + else: + lo = mid+1 + return lo + + while lo < hi: + mid = (lo+hi)//2 + if x < a[mid]: + hi = mid + else: + lo = mid+1 + return lo + + x_value = key(x) + + if reverse: + while lo < hi: + mid = (lo+hi)//2 + if x_value > key(a[mid]): + hi = mid + else: + lo = mid+1 + return lo + while lo < hi: mid = (lo+hi)//2 - if x < a[mid]: hi = mid - else: lo = mid+1 + if x_value < key(a[mid]): + hi = mid + else: + lo = mid+1 return lo -def insort_left(a, x, lo=0, hi=None): +def insort_left(a, x, lo=0, hi=None, *, key=None, reverse=False): """Insert item x in list a, and keep it sorted assuming a is sorted. If x is already in a, insert it to the left of the leftmost x. Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + Optional argument key is a function of one argument used to + customize the order. """ - lo = bisect_left(a, x, lo, hi) + lo = bisect_left(a, x, lo, hi, key=key, reverse=reverse) a.insert(lo, x) -def bisect_left(a, x, lo=0, hi=None): +def bisect_left(a, x, lo=0, hi=None, *, key=None, reverse=False): """Return the index where to insert item x in list a, assuming a is sorted. The return value i is such that all e in a[:i] have e < x, and all e in @@ -55,16 +96,51 @@ def bisect_left(a, x, lo=0, hi=None): Optional args lo (default 0) and hi (default len(a)) bound the slice of a to be searched. + + Optional argument key is a function of one argument used to + customize the order. """ if lo < 0: raise ValueError('lo must be non-negative') if hi is None: hi = len(a) + + if key is None: + if reverse: + while lo < hi: + mid = (lo+hi)//2 + if a[mid] > x: + lo = mid+1 + else: + hi = mid + return lo + + while lo < hi: + mid = (lo+hi)//2 + if a[mid] < x: + lo = mid+1 + else: + hi = mid + return lo + + x_value = key(x) + + if reverse: + while lo < hi: + mid = (lo+hi)//2 + if key(a[mid]) > x_value: + lo = mid+1 + else: + hi = mid + return lo + while lo < hi: mid = (lo+hi)//2 - if a[mid] < x: lo = mid+1 - else: hi = mid + if key(a[mid]) < x_value: + lo = mid+1 + else: + hi = mid return lo # Overwrite above definitions with a fast C implementation diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py index 580a963f627a34..12e0f7de459a3d 100644 --- a/Lib/test/test_bisect.py +++ b/Lib/test/test_bisect.py @@ -199,6 +199,40 @@ def test_keyword_args(self): self.module.insort(a=data, x=25, lo=1, hi=3) self.assertEqual(data, [10, 20, 25, 25, 25, 30, 40, 50]) + for func in (self.module.bisect, self.module.bisect_right, + self.module.bisect_left, self.module.insort_left, + self.module.insort_right, self.module.insort): + + with self.assertRaises(TypeError): + func(data, 25, 1, 3, lambda e: e) + + def test_key(self): + data = ["z", "yy", "www"] + self.assertEqual(self.module.bisect_left(data, "xx", key=len), 1) + self.assertEqual(self.module.bisect_right(data, "xx", key=len), 2) + self.assertEqual(self.module.bisect(data, "xx", key=len), 2) + self.module.insort_left(data, "aa", key=len) + self.module.insort_right(data, "bb", key=len) + self.module.insort_right(data, "cc", key=len) + self.assertEqual(data, ['z', 'aa', 'yy', 'bb', 'cc', 'www']) + + # check None is accepted + self.module.insort_right(data, "cc", key=None) + self.module.insort_left(data, "cc", key=None) + self.module.bisect_right(data, "cc", key=None) + self.module.bisect_left(data, "cc", key=None) + + def test_reverse(self): + data = [50, 40, 30, 20, 10] + self.assertEqual(self.module.bisect_left(data, 15, reverse=True), 4) + self.assertEqual(self.module.bisect_right(data, 15, reverse=True), 4) + self.assertEqual(self.module.bisect(data, 15, reverse=True), 4) + self.module.insort_left(data, 15, reverse=True) + self.module.insort_right(data, 15, reverse=True) + self.module.insort(data, 15, reverse=True) + self.assertEqual(data, [50, 40, 30, 20, 15, 15, 15, 10]) + + class TestBisectPython(TestBisect, unittest.TestCase): module = py_bisect diff --git a/Misc/NEWS.d/next/Library/2019-02-07-15-44-12.bpo-4356.i6h86W.rst b/Misc/NEWS.d/next/Library/2019-02-07-15-44-12.bpo-4356.i6h86W.rst new file mode 100644 index 00000000000000..37c33ab4d64bff --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-02-07-15-44-12.bpo-4356.i6h86W.rst @@ -0,0 +1,3 @@ +Functions in the `bisect` module now take a key parameter to specify the +function to call before perfoming the comparison. Contributed by Rémi +Lapeyre. diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c index 461a11f5099db3..6ac00eceb70e59 100644 --- a/Modules/_bisectmodule.c +++ b/Modules/_bisectmodule.c @@ -9,67 +9,113 @@ Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru). _Py_IDENTIFIER(insert); static inline Py_ssize_t -internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi) +internal_bisect_right(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi, + PyObject *key, int reverse) { PyObject *litem; + PyObject *item_value = NULL; Py_ssize_t mid; int res; + if (key == NULL) { + item_value = item; + Py_INCREF(item); + } else { + PyObject *arglist = Py_BuildValue("(O)", item); + item_value = PyObject_CallObject(key, arglist); + Py_DECREF(arglist); + if (item_value == NULL) { + return -1; + } + } + if (lo < 0) { PyErr_SetString(PyExc_ValueError, "lo must be non-negative"); - return -1; + goto fail; } if (hi == -1) { hi = PySequence_Size(list); - if (hi < 0) - return -1; + if (hi < 0) { + goto fail; + } } + + int comparator = reverse ? Py_GT : Py_LT; while (lo < hi) { /* The (size_t)cast ensures that the addition and subsequent division are performed as unsigned operations, avoiding difficulties from signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; litem = PySequence_GetItem(list, mid); - if (litem == NULL) - return -1; - res = PyObject_RichCompareBool(item, litem, Py_LT); + if (litem == NULL) { + goto fail; + } + if (key == NULL) { + res = PyObject_RichCompareBool(item_value, litem, comparator); + } + else { + PyObject *arglist = Py_BuildValue("(O)", litem); + PyObject *litem_value = PyObject_CallObject(key, arglist); + Py_DECREF(arglist); + if (litem_value == NULL) { + goto fail; + } + res = PyObject_RichCompareBool(item_value, litem_value, comparator); + Py_DECREF(litem_value); + } + Py_DECREF(litem); - if (res < 0) - return -1; - if (res) + if (res < 0) { + goto fail; + } + if (res) { hi = mid; - else + } + else { lo = mid + 1; + } } + Py_DECREF(item_value); return lo; + +fail: + Py_DECREF(item_value); + return -1; } static PyObject * bisect_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; + PyObject *key = NULL; + int reverse = 0; Py_ssize_t lo = 0; Py_ssize_t hi = -1; Py_ssize_t index; - static char *keywords[] = {"a", "x", "lo", "hi", NULL}; + static char *keywords[] = {"a", "x", "lo", "hi", "key", "reverse", NULL}; if (kw == NULL && PyTuple_GET_SIZE(args) == 2) { list = PyTuple_GET_ITEM(args, 0); item = PyTuple_GET_ITEM(args, 1); } else { - if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:bisect_right", - keywords, &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn$Op:bisect_right", + keywords, &list, &item, &lo, &hi, + &key, &reverse)) return NULL; } - index = internal_bisect_right(list, item, lo, hi); - if (index < 0) + if (key == Py_None) { + key = NULL; + } + index = internal_bisect_right(list, item, lo, hi, key, reverse); + if (index < 0) { return NULL; + } return PyLong_FromSsize_t(index); } PyDoc_STRVAR(bisect_right_doc, -"bisect_right(a, x[, lo[, hi]]) -> index\n\ +"bisect_right(a, x[, lo[, hi[, key]]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -78,29 +124,39 @@ a[i:] have e > x. So if x already appears in the list, i points just\n\ beyond the rightmost x already there\n\ \n\ Optional args lo (default 0) and hi (default len(a)) bound the\n\ -slice of a to be searched.\n"); +slice of a to be searched.\n\ +\n\ +Optional argument key is a function of one argument used to\n\ +customize the order.\n"); static PyObject * insort_right(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; + PyObject *key = NULL; + int reverse = 0; Py_ssize_t lo = 0; Py_ssize_t hi = -1; Py_ssize_t index; - static char *keywords[] = {"a", "x", "lo", "hi", NULL}; + static char *keywords[] = {"a", "x", "lo", "hi", "key", "reverse", NULL}; if (kw == NULL && PyTuple_GET_SIZE(args) == 2) { list = PyTuple_GET_ITEM(args, 0); item = PyTuple_GET_ITEM(args, 1); } else { - if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:insort_right", - keywords, &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn$Op:insort_right", + keywords, &list, &item, &lo, &hi, + &key, &reverse)) return NULL; } - index = internal_bisect_right(list, item, lo, hi); - if (index < 0) + if (key == Py_None) { + key = NULL; + } + index = internal_bisect_right(list, item, lo, hi, key, reverse); + if (index < 0) { return NULL; + } if (PyList_CheckExact(list)) { if (PyList_Insert(list, index, item) < 0) return NULL; @@ -116,77 +172,125 @@ insort_right(PyObject *self, PyObject *args, PyObject *kw) } PyDoc_STRVAR(insort_right_doc, -"insort_right(a, x[, lo[, hi]])\n\ +"insort_right(a, x[, lo[, hi[, key]]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ If x is already in a, insert it to the right of the rightmost x.\n\ \n\ Optional args lo (default 0) and hi (default len(a)) bound the\n\ -slice of a to be searched.\n"); +slice of a to be searched.\n\ +\n\ +Optional argument key is a function of one argument used to\n\ +customize the order.\n"); static inline Py_ssize_t -internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi) +internal_bisect_left(PyObject *list, PyObject *item, Py_ssize_t lo, Py_ssize_t hi, + PyObject *key, int reverse) { PyObject *litem; + PyObject *item_value = NULL; Py_ssize_t mid; int res; + if (key == NULL) { + item_value = item; + Py_INCREF(item); + } else { + PyObject *arglist = Py_BuildValue("(O)", item); + item_value = PyObject_CallObject(key, arglist); + Py_DECREF(arglist); + if (item_value == NULL) { + return -1; + } + } + if (lo < 0) { PyErr_SetString(PyExc_ValueError, "lo must be non-negative"); - return -1; + goto fail; } if (hi == -1) { hi = PySequence_Size(list); - if (hi < 0) - return -1; + if (hi < 0) { + goto fail; + } } + + int comparator = reverse ? Py_GT : Py_LT; while (lo < hi) { /* The (size_t)cast ensures that the addition and subsequent division are performed as unsigned operations, avoiding difficulties from signed overflow. (See issue 13496.) */ mid = ((size_t)lo + hi) / 2; litem = PySequence_GetItem(list, mid); - if (litem == NULL) - return -1; - res = PyObject_RichCompareBool(litem, item, Py_LT); + if (litem == NULL) { + goto fail; + } + if (key == NULL) { + res = PyObject_RichCompareBool(litem, item_value, comparator); + } + else { + PyObject *arglist = Py_BuildValue("(O)", litem); + PyObject *litem_value = PyObject_CallObject(key, arglist); + Py_DECREF(arglist); + if (litem_value == NULL) { + goto fail; + } + res = PyObject_RichCompareBool(litem_value, item_value, comparator); + } + Py_DECREF(litem); - if (res < 0) - return -1; - if (res) + if (res < 0) { + goto fail; + } + if (res) { lo = mid + 1; - else + } + else { hi = mid; + } } + Py_DECREF(item_value); return lo; + +fail: + Py_DECREF(item_value); + return -1; } static PyObject * bisect_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item; + PyObject *key = NULL; + int reverse = 0; Py_ssize_t lo = 0; Py_ssize_t hi = -1; Py_ssize_t index; - static char *keywords[] = {"a", "x", "lo", "hi", NULL}; + static char *keywords[] = {"a", "x", "lo", "hi", "key", "reverse", NULL}; if (kw == NULL && PyTuple_GET_SIZE(args) == 2) { list = PyTuple_GET_ITEM(args, 0); item = PyTuple_GET_ITEM(args, 1); } else { - if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:bisect_left", - keywords, &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn$Op:bisect_left", + keywords, &list, &item, &lo, &hi, + &key, &reverse)) return NULL; } - index = internal_bisect_left(list, item, lo, hi); - if (index < 0) + if (key == Py_None) { + key = NULL; + } + index = internal_bisect_left(list, item, lo, hi, key, reverse); + if (index < 0) { return NULL; + } return PyLong_FromSsize_t(index); } PyDoc_STRVAR(bisect_left_doc, -"bisect_left(a, x[, lo[, hi]]) -> index\n\ +"bisect_left(a, x[, lo[, hi[, key]]]) -> index\n\ \n\ Return the index where to insert item x in list a, assuming a is sorted.\n\ \n\ @@ -195,26 +299,35 @@ a[i:] have e >= x. So if x already appears in the list, i points just\n\ before the leftmost x already there.\n\ \n\ Optional args lo (default 0) and hi (default len(a)) bound the\n\ -slice of a to be searched.\n"); +slice of a to be searched.\n\ +\n\ +Optional argument key is a function of one argument used to\n\ +customize the order.\n"); static PyObject * insort_left(PyObject *self, PyObject *args, PyObject *kw) { PyObject *list, *item, *result; + PyObject *key = NULL; + int reverse = 0; Py_ssize_t lo = 0; Py_ssize_t hi = -1; Py_ssize_t index; - static char *keywords[] = {"a", "x", "lo", "hi", NULL}; + static char *keywords[] = {"a", "x", "lo", "hi", "key", "reverse", NULL}; if (kw == NULL && PyTuple_GET_SIZE(args) == 2) { list = PyTuple_GET_ITEM(args, 0); item = PyTuple_GET_ITEM(args, 1); } else { - if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn:insort_left", - keywords, &list, &item, &lo, &hi)) + if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|nn$Op:insort_left", + keywords, &list, &item, &lo, &hi, + &key, &reverse)) return NULL; } - index = internal_bisect_left(list, item, lo, hi); + if (key == Py_None) { + key = NULL; + } + index = internal_bisect_left(list, item, lo, hi, key, reverse); if (index < 0) return NULL; if (PyList_CheckExact(list)) { @@ -231,14 +344,17 @@ insort_left(PyObject *self, PyObject *args, PyObject *kw) } PyDoc_STRVAR(insort_left_doc, -"insort_left(a, x[, lo[, hi]])\n\ +"insort_left(a, x[, lo[, hi[, key]]])\n\ \n\ Insert item x in list a, and keep it sorted assuming a is sorted.\n\ \n\ If x is already in a, insert it to the left of the leftmost x.\n\ \n\ Optional args lo (default 0) and hi (default len(a)) bound the\n\ -slice of a to be searched.\n"); +slice of a to be searched.\n\ +\n\ +Optional argument key is a function of one argument used to\n\ +customize the order.\n"); static PyMethodDef bisect_methods[] = { {"bisect_right", (PyCFunction)(void(*)(void))bisect_right, @@ -258,7 +374,10 @@ PyDoc_STRVAR(module_doc, This module provides support for maintaining a list in sorted order without\n\ having to sort the list after each insertion. For long lists of items with\n\ expensive comparison operations, this can be an improvement over the more\n\ -common approach.\n"); +common approach.\n\ +\n\ +Optional argument key is a function of one argument used to\n\ +customize the order.\n"); static struct PyModuleDef _bisectmodule = {