Skip to content

gh-120496: Use CAS approach for rangeiter_next #120534

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Include/cpython/pyatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ _Py_atomic_add_int32(int32_t *obj, int32_t value);
static inline int64_t
_Py_atomic_add_int64(int64_t *obj, int64_t value);

static inline long
_Py_atomic_add_long(long *obj, long value);

static inline intptr_t
_Py_atomic_add_intptr(intptr_t *obj, intptr_t value);

Expand Down Expand Up @@ -155,6 +158,9 @@ _Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t desir
static inline int
_Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desired);

static inline int
_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired);

static inline int
_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired);

Expand Down Expand Up @@ -348,6 +354,9 @@ _Py_atomic_load_uint32_relaxed(const uint32_t *obj);
static inline uint64_t
_Py_atomic_load_uint64_relaxed(const uint64_t *obj);

static inline long
_Py_atomic_load_long_relaxed(const long *obj);

static inline uintptr_t
_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj);

Expand Down
13 changes: 13 additions & 0 deletions Include/cpython/pyatomic_gcc.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ static inline int64_t
_Py_atomic_add_int64(int64_t *obj, int64_t value)
{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); }

static inline long
_Py_atomic_add_long(long *obj, long value)
{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); }

static inline intptr_t
_Py_atomic_add_intptr(intptr_t *obj, intptr_t value)
{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); }
Expand Down Expand Up @@ -90,6 +94,11 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desir
{ return __atomic_compare_exchange_n(obj, expected, desired, 0,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }

static inline int
_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired)
{ return __atomic_compare_exchange_n(obj, expected, desired, 0,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }

static inline int
_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired)
{ return __atomic_compare_exchange_n(obj, expected, desired, 0,
Expand Down Expand Up @@ -342,6 +351,10 @@ static inline uint64_t
_Py_atomic_load_uint64_relaxed(const uint64_t *obj)
{ return __atomic_load_n(obj, __ATOMIC_RELAXED); }

static inline long
_Py_atomic_load_long_relaxed(const long *obj)
{ return __atomic_load_n(obj, __ATOMIC_RELAXED); }

static inline uintptr_t
_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj)
{ return __atomic_load_n(obj, __ATOMIC_RELAXED); }
Expand Down
38 changes: 38 additions & 0 deletions Include/cpython/pyatomic_msc.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,23 @@ _Py_atomic_add_int64(int64_t *obj, int64_t value)
#endif
}

static inline long
_Py_atomic_add_long(long *obj, long value)
{
#if defined(_M_X64) || defined(_M_ARM64)
_Py_atomic_ASSERT_ARG_TYPE(long);
return (long)_InterlockedExchangeAdd((volatile long *)obj, (long)value);
#else
long old_value = _Py_atomic_load_long_relaxed(obj);
for (;;) {
long new_value = old_value + value;
if (_Py_atomic_compare_exchange_long(obj, &old_value, new_value)) {
return old_value;
}
}
#endif
}


static inline uint8_t
_Py_atomic_add_uint8(uint8_t *obj, uint8_t value)
Expand Down Expand Up @@ -187,6 +204,21 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t value
return 0;
}

static inline int
_Py_atomic_compare_exchange_long(long *obj, long *expected, long value)
{
_Py_atomic_ASSERT_ARG_TYPE(long);
long initial = (long)_InterlockedCompareExchange(
(volatile long *)obj,
(long)value,
(long)*expected);
if (initial == *expected) {
return 1;
}
*expected = initial;
return 0;
}

static inline int
_Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *value)
{
Expand Down Expand Up @@ -688,6 +720,12 @@ _Py_atomic_load_uint64_relaxed(const uint64_t *obj)
return *(volatile uint64_t *)obj;
}

static inline long
_Py_atomic_load_long_relaxed(const long *obj)
{
return *(volatile long *)obj;
}

static inline uintptr_t
_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj)
{
Expand Down
24 changes: 24 additions & 0 deletions Include/cpython/pyatomic_std.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ _Py_atomic_add_int64(int64_t *obj, int64_t value)
return atomic_fetch_add((_Atomic(int64_t)*)obj, value);
}

static inline long
_Py_atomic_add_long(long *obj, long value)
{
_Py_USING_STD;
return atomic_fetch_add((_Atomic(long)*)obj, value);
}

static inline intptr_t
_Py_atomic_add_intptr(intptr_t *obj, intptr_t value)
{
Expand Down Expand Up @@ -154,6 +161,14 @@ _Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desir
expected, desired);
}

static inline int
_Py_atomic_compare_exchange_long(long *obj, long *expected, long desired)
{
_Py_USING_STD;
return atomic_compare_exchange_strong((_Atomic(long)*)obj,
expected, desired);
}

static inline int
_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired)
{
Expand Down Expand Up @@ -587,6 +602,15 @@ _Py_atomic_load_uint64_relaxed(const uint64_t *obj)
memory_order_relaxed);
}

static inline long
_Py_atomic_load_long_relaxed(const long *obj)
{
_Py_USING_STD;
return atomic_load_explicit((const _Atomic(long)*)obj,
memory_order_relaxed);
}


static inline uintptr_t
_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj)
{
Expand Down
16 changes: 16 additions & 0 deletions Objects/rangeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -816,13 +816,29 @@ PyTypeObject PyRange_Type = {
static PyObject *
rangeiter_next(_PyRangeIterObject *r)
{
#ifdef Py_GIL_DISABLED
// r->step is readonly attribute, so we can assume it is not changed.
long step = r->step;
do {
long len = _Py_atomic_load_long_relaxed(&r->len);
if (len <= 0) {
return NULL;
}
long result = _Py_atomic_load_long_relaxed(&r->start);
Copy link
Contributor

@eendebakpt eendebakpt Jun 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we have a range object with len=1 and many threads simultaneously at this point? The check on len has already been done, so the threads each increment the value of start and we end up with a value for result larger than end.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe fail at _Py_atomic_compare_exchange_long?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah.. yes understood what you pointed out.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There will be a slight timing issue. Then yes, let's just use the critical section that @Fidget-Spinner also preferred.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To prepare for the overcommitted issue, we can validate the value of the result by comparing the end value.
But let's just use critical section approach.

if (_Py_atomic_compare_exchange_long(&r->start, &result, result + step)) {
_Py_atomic_add_long(&r->len, -1);
return PyLong_FromLong(result);
}
} while (1);
#else
if (r->len > 0) {
long result = r->start;
r->start = result + r->step;
r->len--;
return PyLong_FromLong(result);
}
return NULL;
#endif
}

static PyObject *
Expand Down
Loading