-
Notifications
You must be signed in to change notification settings - Fork 89
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
C++ refactoring: ak.unflatten #1360
Merged
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
486af4f
ak.unflatten and testing
ioanaif 8e6ae67
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 85ab046
Refactored unflatten to drop use of kernels
ioanaif f8dac2f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 1174b6a
Update src/awkward/_v2/operations/structure/ak_unflatten.py
jpivarski File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,204 +1,200 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE | ||
|
||
import numbers | ||
import awkward as ak | ||
|
||
np = ak.nplike.NumpyMetadata.instance() | ||
|
||
|
||
def unflatten(array, counts, axis=0, highlevel=True, behavior=None): | ||
raise ak._v2._util.error(NotImplementedError) | ||
|
||
|
||
# """ | ||
# Args: | ||
# array: Data to create an array with an additional level from. | ||
# counts (int or array): Number of elements the new level should have. | ||
# If an integer, the new level will be regularly sized; otherwise, | ||
# it will consist of variable-length lists with the given lengths. | ||
# axis (int): The dimension at which this operation is applied. The | ||
# outermost dimension is `0`, followed by `1`, etc., and negative | ||
# values count backward from the innermost: `-1` is the innermost | ||
# dimension, `-2` is the next level up, etc. | ||
# highlevel (bool): If True, return an #ak.Array; otherwise, return | ||
# a low-level #ak.layout.Content subclass. | ||
# behavior (None or dict): Custom #ak.behavior for the output array, if | ||
# high-level. | ||
|
||
# Returns an array with an additional level of nesting. This is roughly the | ||
# inverse of #ak.flatten, where `counts` were obtained by #ak.num (both with | ||
# `axis=1`). | ||
|
||
# For example, | ||
|
||
# >>> original = ak.Array([[0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9]]) | ||
# >>> counts = ak.num(original) | ||
# >>> array = ak.flatten(original) | ||
# >>> counts | ||
# <Array [3, 0, 2, 1, 4] type='5 * int64'> | ||
# >>> array | ||
# <Array [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] type='10 * int64'> | ||
# >>> ak.unflatten(array, counts) | ||
# <Array [[0, 1, 2], [], ... [5], [6, 7, 8, 9]] type='5 * var * int64'> | ||
|
||
# An inner dimension can be unflattened by setting the `axis` parameter, but | ||
# operations like this constrain the `counts` more tightly. | ||
|
||
# For example, we can subdivide an already divided list: | ||
|
||
# >>> original = ak.Array([[1, 2, 3, 4], [], [5, 6, 7], [8, 9]]) | ||
# >>> print(ak.unflatten(original, [2, 2, 1, 2, 1, 1], axis=1)) | ||
# [[[1, 2], [3, 4]], [], [[5], [6, 7]], [[8], [9]]] | ||
|
||
# But the counts have to add up to the lengths of those lists. We can't mix | ||
# values from the first `[1, 2, 3, 4]` with values from the next `[5, 6, 7]`. | ||
|
||
# >>> print(ak.unflatten(original, [2, 1, 2, 2, 1, 1], axis=1)) | ||
# Traceback (most recent call last): | ||
# ... | ||
# ValueError: structure imposed by 'counts' does not fit in the array at axis=1 | ||
|
||
# Also note that new lists created by this function cannot cross partitions | ||
# (which is only possible at `axis=0`, anyway). | ||
|
||
# See also #ak.num and #ak.flatten. | ||
# """ | ||
# nplike = ak.nplike.of(array) | ||
|
||
# layout = ak._v2.operations.convert.to_layout( | ||
# array, allow_record=False, allow_other=False | ||
# ) | ||
|
||
# if isinstance(counts, (numbers.Integral, np.integer)): | ||
# current_offsets = None | ||
# else: | ||
# counts = ak._v2.operations.convert.to_layout( | ||
# counts, allow_record=False, allow_other=False | ||
# ) | ||
# ptr_lib = ak._v2.operations.convert.kernels(array) | ||
# counts = ak._v2.operations.convert.to_kernels(counts, ptr_lib, highlevel=False) | ||
# if ptr_lib == "cpu": | ||
# counts = ak._v2.operations.convert.to_numpy(counts, allow_missing=True) | ||
# mask = ak.nplike.numpy.ma.getmask(counts) | ||
# counts = ak.nplike.numpy.ma.filled(counts, 0) | ||
# elif ptr_lib == "cuda": | ||
# counts = ak._v2.operations.convert.to_cupy(counts) | ||
# mask = False | ||
# else: | ||
# raise ak._v2._util.error(AssertionError( | ||
# "unrecognized kernels lib" | ||
# )) | ||
# if counts.ndim != 1: | ||
# raise ak._v2._util.error(ValueError( | ||
# "counts must be one-dimensional" | ||
# )) | ||
# if not issubclass(counts.dtype.type, np.integer): | ||
# raise ak._v2._util.error(ValueError( | ||
# "counts must be integers" | ||
# )) | ||
# current_offsets = [nplike.empty(len(counts) + 1, np.int64)] | ||
# current_offsets[0][0] = 0 | ||
# nplike.cumsum(counts, out=current_offsets[0][1:]) | ||
|
||
# def doit(layout): | ||
# if isinstance(counts, (numbers.Integral, np.integer)): | ||
# if counts < 0 or counts > len(layout): | ||
# raise ak._v2._util.error(ValueError( | ||
# "too large counts for array or negative counts" | ||
# | ||
# )) | ||
# out = ak._v2.contents.RegularArray(layout, counts) | ||
|
||
# else: | ||
# position = ( | ||
# nplike.searchsorted( | ||
# current_offsets[0], nplike.array([len(layout)]), side="right" | ||
# )[0] | ||
# - 1 | ||
# ) | ||
# if position >= len(current_offsets[0]) or current_offsets[0][ | ||
# position | ||
# ] != len(layout): | ||
# raise ak._v2._util.error(ValueError( | ||
# "structure imposed by 'counts' does not fit in the array or partition " | ||
# "at axis={0}".format(axis) | ||
# )) | ||
|
||
# offsets = current_offsets[0][: position + 1] | ||
# current_offsets[0] = current_offsets[0][position:] - len(layout) | ||
|
||
# out = ak._v2.contents.ListOffsetArray64(ak._v2.contents.Index64(offsets), layout) | ||
# if not isinstance(mask, (bool, np.bool_)): | ||
# index = ak._v2.index.Index8(nplike.asarray(mask).astype(np.int8)) | ||
# out = ak._v2.contents.ByteMaskedArray(index, out, valid_when=False) | ||
|
||
# return out | ||
|
||
# if axis == 0 or layout.axis_wrap_if_negative(axis) == 0: | ||
# if isinstance(layout, ak.partition.PartitionedArray): # NO PARTITIONED ARRAY | ||
# outparts = [] | ||
# for part in layout.partitions: | ||
# outparts.append(doit(part)) | ||
# out = ak.partition.IrregularlyPartitionedArray(outparts) # NO PARTITIONED ARRAY | ||
# else: | ||
# out = doit(layout) | ||
|
||
# else: | ||
|
||
# def transform(layout, depth, posaxis): | ||
# # Pack the current layout. This ensures that the `counts` array, | ||
# # which is computed with these layouts applied, aligns with the | ||
# # internal layout to be unflattened (#910) | ||
# layout = _pack_layout(layout) | ||
|
||
# posaxis = layout.axis_wrap_if_negative(posaxis) | ||
# if posaxis == depth and isinstance(layout, ak._v2._util.listtypes): | ||
# # We are one *above* the level where we want to apply this. | ||
# listoffsetarray = layout.toListOffsetArray64(True) | ||
# outeroffsets = nplike.asarray(listoffsetarray.offsets) | ||
|
||
# content = doit(listoffsetarray.content[: outeroffsets[-1]]) | ||
# if isinstance(content, ak._v2.contents.ByteMaskedArray): | ||
# inneroffsets = nplike.asarray(content.content.offsets) | ||
# elif isinstance(content, ak._v2.contents.RegularArray): | ||
# inneroffsets = nplike.asarray( | ||
# content.toListOffsetArray64(True).offsets | ||
# ) | ||
# else: | ||
# inneroffsets = nplike.asarray(content.offsets) | ||
|
||
# positions = ( | ||
# nplike.searchsorted(inneroffsets, outeroffsets, side="right") - 1 | ||
# ) | ||
# if not nplike.array_equal(inneroffsets[positions], outeroffsets): | ||
# raise ak._v2._util.error(ValueError( | ||
# "structure imposed by 'counts' does not fit in the array or partition " | ||
# "at axis={0}".format(axis) | ||
# )) | ||
# positions[0] = 0 | ||
|
||
# return ak._v2.contents.ListOffsetArray64( | ||
# ak._v2.index.Index64(positions), content | ||
# ) | ||
|
||
# else: | ||
# return ak._v2._util.transform_child_layouts( | ||
# transform, layout, depth, posaxis | ||
# ) | ||
|
||
# if isinstance(layout, ak.partition.PartitionedArray): # NO PARTITIONED ARRAY | ||
# outparts = [] | ||
# for part in layout.partitions: | ||
# outparts.append(transform(part, depth=1, posaxis=axis)) | ||
# out = ak.partition.IrregularlyPartitionedArray(outparts) # NO PARTITIONED ARRAY | ||
# else: | ||
# out = transform(layout, depth=1, posaxis=axis) | ||
|
||
# if current_offsets is not None and not ( | ||
# len(current_offsets[0]) == 1 and current_offsets[0][0] == 0 | ||
# ): | ||
# raise ak._v2._util.error(ValueError( | ||
# "structure imposed by 'counts' does not fit in the array or partition " | ||
# "at axis={0}".format(axis) | ||
# )) | ||
|
||
# return ak._v2._util.maybe_wrap_like(out, array, behavior, highlevel) | ||
""" | ||
Args: | ||
array: Data to create an array with an additional level from. | ||
counts (int or array): Number of elements the new level should have. | ||
If an integer, the new level will be regularly sized; otherwise, | ||
it will consist of variable-length lists with the given lengths. | ||
axis (int): The dimension at which this operation is applied. The | ||
outermost dimension is `0`, followed by `1`, etc., and negative | ||
values count backward from the innermost: `-1` is the innermost | ||
dimension, `-2` is the next level up, etc. | ||
highlevel (bool): If True, return an #ak.Array; otherwise, return | ||
a low-level #ak.layout.Content subclass. | ||
behavior (None or dict): Custom #ak.behavior for the output array, if | ||
high-level. | ||
|
||
Returns an array with an additional level of nesting. This is roughly the | ||
inverse of #ak.flatten, where `counts` were obtained by #ak.num (both with | ||
`axis=1`). | ||
|
||
For example, | ||
|
||
>>> original = ak.Array([[0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9]]) | ||
>>> counts = ak.num(original) | ||
>>> array = ak.flatten(original) | ||
>>> counts | ||
<Array [3, 0, 2, 1, 4] type='5 * int64'> | ||
>>> array | ||
<Array [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] type='10 * int64'> | ||
>>> ak.unflatten(array, counts) | ||
<Array [[0, 1, 2], [], ... [5], [6, 7, 8, 9]] type='5 * var * int64'> | ||
|
||
An inner dimension can be unflattened by setting the `axis` parameter, but | ||
operations like this constrain the `counts` more tightly. | ||
|
||
For example, we can subdivide an already divided list: | ||
|
||
>>> original = ak.Array([[1, 2, 3, 4], [], [5, 6, 7], [8, 9]]) | ||
>>> print(ak.unflatten(original, [2, 2, 1, 2, 1, 1], axis=1)) | ||
[[[1, 2], [3, 4]], [], [[5], [6, 7]], [[8], [9]]] | ||
|
||
But the counts have to add up to the lengths of those lists. We can't mix | ||
values from the first `[1, 2, 3, 4]` with values from the next `[5, 6, 7]`. | ||
|
||
>>> print(ak.unflatten(original, [2, 1, 2, 2, 1, 1], axis=1)) | ||
Traceback (most recent call last): | ||
... | ||
ValueError: structure imposed by 'counts' does not fit in the array at axis=1 | ||
|
||
Also note that new lists created by this function cannot cross partitions | ||
(which is only possible at `axis=0`, anyway). | ||
|
||
See also #ak.num and #ak.flatten. | ||
""" | ||
with ak._v2._util.OperationErrorContext( | ||
"ak._v2.unflatten", | ||
dict( | ||
array=array, | ||
counts=counts, | ||
axis=axis, | ||
highlevel=highlevel, | ||
behavior=behavior, | ||
), | ||
): | ||
return _impl(array, counts, axis, highlevel, behavior) | ||
|
||
|
||
def _impl(array, counts, axis, highlevel, behavior): | ||
nplike = ak.nplike.of(array) | ||
|
||
layout = ak._v2.operations.convert.to_layout( | ||
array, allow_record=False, allow_other=False | ||
) | ||
|
||
if isinstance(counts, (numbers.Integral, np.integer)): | ||
current_offsets = None | ||
else: | ||
counts = ak._v2.operations.convert.to_layout( | ||
counts, allow_record=False, allow_other=False | ||
) | ||
|
||
if counts.is_OptionType: | ||
mask = counts.mask_as_bool(valid_when=False) | ||
counts = counts.to_numpy(allow_missing=True) | ||
counts = ak.nplike.numpy.ma.filled(counts, 0) | ||
elif counts.is_NumpyType or counts.is_UnknownType: | ||
counts = counts.to_numpy(allow_missing=False) | ||
mask = False | ||
|
||
if counts.ndim != 1: | ||
raise ak._v2._util.error(ValueError("counts must be one-dimensional")) | ||
if not issubclass(counts.dtype.type, np.integer): | ||
raise ak._v2._util.error(ValueError("counts must be integers")) | ||
|
||
current_offsets = [nplike.empty(len(counts) + 1, np.int64)] | ||
current_offsets[0][0] = 0 | ||
nplike.cumsum(counts, out=current_offsets[0][1:]) | ||
|
||
def doit(layout): | ||
if isinstance(counts, (numbers.Integral, np.integer)): | ||
if counts < 0 or counts > len(layout): | ||
raise ak._v2._util.error( | ||
ValueError("too large counts for array or negative counts") | ||
) | ||
out = ak._v2.contents.RegularArray(layout, counts) | ||
|
||
else: | ||
position = ( | ||
nplike.searchsorted( | ||
current_offsets[0], nplike.array([len(layout)]), side="right" | ||
)[0] | ||
- 1 | ||
) | ||
if position >= len(current_offsets[0]) or current_offsets[0][ | ||
position | ||
] != len(layout): | ||
raise ak._v2._util.error( | ||
ValueError( | ||
"structure imposed by 'counts' does not fit in the array or partition " | ||
"at axis={}".format(axis) | ||
) | ||
) | ||
|
||
offsets = current_offsets[0][: position + 1] | ||
current_offsets[0] = current_offsets[0][position:] - len(layout) | ||
|
||
out = ak._v2.contents.ListOffsetArray(ak._v2.index.Index64(offsets), layout) | ||
if not isinstance(mask, (bool, np.bool_)): | ||
index = ak._v2.index.Index8(nplike.asarray(mask).astype(np.int8)) | ||
out = ak._v2.contents.ByteMaskedArray(index, out, valid_when=False) | ||
|
||
return out | ||
|
||
if axis == 0 or layout.axis_wrap_if_negative(axis) == 0: | ||
out = doit(layout) | ||
|
||
else: | ||
|
||
def transform(layout, depth, posaxis): | ||
# Pack the current layout. This ensures that the `counts` array, | ||
# which is computed with these layouts applied, aligns with the | ||
# internal layout to be unflattened (#910) | ||
layout = layout.packed() | ||
|
||
posaxis = layout.axis_wrap_if_negative(posaxis) | ||
if posaxis == depth and layout.is_ListType: | ||
# We are one *above* the level where we want to apply this. | ||
listoffsetarray = layout.toListOffsetArray64(True) | ||
outeroffsets = nplike.asarray(listoffsetarray.offsets) | ||
|
||
content = doit(listoffsetarray.content[: outeroffsets[-1]]) | ||
if isinstance(content, ak._v2.contents.ByteMaskedArray): | ||
inneroffsets = nplike.asarray(content.content.offsets) | ||
elif isinstance(content, ak._v2.contents.RegularArray): | ||
inneroffsets = nplike.asarray( | ||
content.toListOffsetArray64(True).offsets | ||
) | ||
else: | ||
inneroffsets = nplike.asarray(content.offsets) | ||
|
||
positions = ( | ||
nplike.searchsorted(inneroffsets, outeroffsets, side="right") - 1 | ||
) | ||
if not nplike.array_equal(inneroffsets[positions], outeroffsets): | ||
raise ak._v2._util.error( | ||
ValueError( | ||
"structure imposed by 'counts' does not fit in the array or partition " | ||
"at axis={}".format(axis) | ||
) | ||
) | ||
positions[0] = 0 | ||
|
||
return ak._v2.contents.ListOffsetArray( | ||
ak._v2.index.Index64(positions), content | ||
) | ||
|
||
else: | ||
return layout | ||
|
||
out = transform(layout, depth=1, posaxis=axis) | ||
|
||
if current_offsets is not None and not ( | ||
len(current_offsets[0]) == 1 and current_offsets[0][0] == 0 | ||
): | ||
raise ak._v2._util.error( | ||
ValueError( | ||
"structure imposed by 'counts' does not fit in the array or partition " | ||
"at axis={}".format(axis) | ||
) | ||
) | ||
|
||
return ak._v2._util.wrap(out, behavior, highlevel) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wrapping this up as a
transform
function that is only ever used once is now superfluous, but not harmful. In v1, it was needed so that the same procedure could be applied to each partition, but now we don't do PartitionedArrays anymore.I also don't see a strong motivation to refactor it.