Skip to content

Commit fd8f27a

Browse files
committed
Initial commit
1 parent 60b976b commit fd8f27a

File tree

5 files changed

+113
-1
lines changed

5 files changed

+113
-1
lines changed

docs/source/python/api/compute.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ Selections
532532
drop_null
533533
filter
534534
inverse_permutation
535-
take
536535
scatter
536+
take
537537

538538
Sorts and Partitions
539539
--------------------
@@ -606,6 +606,7 @@ Compute Options
606606
ExtractRegexSpanOptions
607607
FilterOptions
608608
IndexOptions
609+
InversePermutationOptions
609610
JoinOptions
610611
ListFlattenOptions
611612
ListSliceOptions
@@ -635,6 +636,7 @@ Compute Options
635636
SkewOptions
636637
SliceOptions
637638
SortOptions
639+
ScatterOptions
638640
SplitOptions
639641
SplitPatternOptions
640642
StrftimeOptions

python/pyarrow/_compute.pyx

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,62 @@ class RunEndEncodeOptions(_RunEndEncodeOptions):
14441444
self._set_options(run_end_type)
14451445

14461446

1447+
cdef class _InversePermutationOptions(FunctionOptions):
1448+
def _set_options(self, max_index, output_type):
1449+
if output_type is None:
1450+
self.wrapped.reset(new CInversePermutationOptions(max_index))
1451+
else:
1452+
output_ty = ensure_type(output_type)
1453+
self.wrapped.reset(
1454+
new CInversePermutationOptions(max_index,
1455+
pyarrow_unwrap_data_type(output_ty)))
1456+
1457+
1458+
class InversePermutationOptions(_InversePermutationOptions):
1459+
"""
1460+
Options for `inverse_permutation` function.
1461+
1462+
Parameters
1463+
----------
1464+
max_index : int64, default -1
1465+
The max value in the input indices to allow.
1466+
The length of the function’s output will be this value plus 1.
1467+
If negative, this value will be set to the length of the input indices
1468+
minus 1 and the length of the function’s output will be the length
1469+
of the input indices.
1470+
output_type : DataType, default None
1471+
The type of the output inverse permutation.
1472+
If None, the output will be of the same type as the input indices, otherwise
1473+
must be signed integer type. An invalid error will be reported if this type
1474+
is not able to store the length of the input indices.
1475+
"""
1476+
1477+
def __init__(self, max_index=-1, output_type=None):
1478+
self._set_options(max_index, output_type)
1479+
1480+
1481+
cdef class _ScatterOptions(FunctionOptions):
1482+
def _set_options(self, max_index):
1483+
self.wrapped.reset(new CScatterOptions(max_index))
1484+
1485+
1486+
class ScatterOptions(_ScatterOptions):
1487+
"""
1488+
Options for `scatter` function.
1489+
1490+
Parameters
1491+
----------
1492+
max_index : int64, default -1
1493+
The max value in the input indices to allow.
1494+
The length of the function’s output will be this value plus 1.
1495+
If negative, this value will be set to the length of the input indices minus 1
1496+
and the length of the function’s output will be the length of the input indices.
1497+
"""
1498+
1499+
def __init__(self, max_index=-1):
1500+
self._set_options(max_index)
1501+
1502+
14471503
cdef class _TakeOptions(FunctionOptions):
14481504
def _set_options(self, boundscheck):
14491505
self.wrapped.reset(new CTakeOptions(boundscheck))

python/pyarrow/compute.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
ExtractRegexSpanOptions,
4444
FilterOptions,
4545
IndexOptions,
46+
InversePermutationOptions,
4647
JoinOptions,
4748
ListSliceOptions,
4849
ListFlattenOptions,
@@ -66,6 +67,7 @@
6667
RoundTemporalOptions,
6768
RoundToMultipleOptions,
6869
ScalarAggregateOptions,
70+
ScatterOptions,
6971
SelectKOptions,
7072
SetLookupOptions,
7173
SkewOptions,

python/pyarrow/includes/libarrow.pxd

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,6 +2588,18 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil:
25882588
CTakeOptions(c_bool boundscheck)
25892589
c_bool boundscheck
25902590

2591+
cdef cppclass CInversePermutationOptions \
2592+
"arrow::compute::InversePermutationOptions"(CFunctionOptions):
2593+
CInversePermutationOptions(int64_t max_index)
2594+
CInversePermutationOptions(int64_t max_index, shared_ptr[CDataType] output_type)
2595+
int64_t max_index
2596+
shared_ptr[CDataType] output_type
2597+
2598+
cdef cppclass CScatterOptions \
2599+
"arrow::compute::ScatterOptions"(CFunctionOptions):
2600+
CScatterOptions(int64_t max_index)
2601+
int64_t max_index
2602+
25912603
cdef cppclass CStrptimeOptions \
25922604
"arrow::compute::StrptimeOptions"(CFunctionOptions):
25932605
CStrptimeOptions(c_string format, TimeUnit unit, c_bool raise_error)

python/pyarrow/tests/test_compute.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def test_option_class_equality(request):
202202
pc.WeekOptions(week_starts_monday=True, count_from_zero=False,
203203
first_week_is_fully_in_year=False),
204204
pc.ZeroFillOptions(4, "0"),
205+
pc.InversePermutationOptions(-1, output_type=pa.int32()),
205206
]
206207
# Timezone database might not be installed on Windows or Emscripten
207208
if request.config.pyarrow.is_enabled["timezone_data"]:
@@ -1590,6 +1591,45 @@ def test_filter_null_type():
15901591
assert len(table.filter(mask).column(0)) == 5
15911592

15921593

1594+
def test_inverse_permutation():
1595+
arr0 = pa.array([], type=pa.int32())
1596+
arr = pa.chunked_array([
1597+
arr0, [9, 7, 5, 3, 1], [0], [2, 4, 6], [8], arr0,
1598+
])
1599+
result = pc.inverse_permutation(arr)
1600+
print(result)
1601+
expected = pa.chunked_array([[5, 4, 6, 3, 7, 2, 8, 1, 9, 0]], type=pa.int32())
1602+
assert result.equals(expected)
1603+
1604+
# `inverse_permutation` kernel currently does not accept options
1605+
options = pc.InversePermutationOptions(max_index=4, output_type=pa.int64())
1606+
print(options)
1607+
with pytest.raises(TypeError, match="an unexpected keyword argument \'options\'"):
1608+
pc.inverse_permutation(arr, options=options)
1609+
1610+
# `inverse_permutation` kernel currently won't accept max_index
1611+
with pytest.raises(TypeError, match="an unexpected keyword argument \'max_index\'"):
1612+
pc.inverse_permutation(arr, max_index=4)
1613+
1614+
1615+
def test_scatter():
1616+
values = pa.array([True, False, True, True, False, False, True, True, True, False])
1617+
indices = pa.array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
1618+
expected = pa.array([False, True, True, True, False,
1619+
False, True, True, False, True])
1620+
result = pc.scatter(values, indices)
1621+
assert result.equals(expected)
1622+
1623+
# `scatter` kernel currently does not accept options
1624+
options = pc.ScatterOptions(max_index=4)
1625+
with pytest.raises(TypeError, match="unexpected keyword argument \'options\'"):
1626+
pc.scatter(values, indices, options=options)
1627+
1628+
# `scatter` kernel currently won't accept max_index
1629+
with pytest.raises(TypeError, match="unexpected keyword argument \'max_index\'"):
1630+
pc.scatter(values, indices, max_index=4)
1631+
1632+
15931633
@pytest.mark.parametrize("typ", ["array", "chunked_array"])
15941634
def test_compare_array(typ):
15951635
if typ == "array":

0 commit comments

Comments
 (0)