Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++ refactoring: Content classes #896

Merged
merged 36 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
386f854
first iteration of index.py refactoring
ioanaif May 28, 2021
3d9aced
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 28, 2021
08840ed
First counter-edits.
jpivarski May 28, 2021
d646421
identifier refactoring
ioanaif May 31, 2021
b68528c
self._data can be None
jpivarski May 31, 2021
d31aaf0
Merge branch 'ioanaif/index-and-identifier-refactoring' of https://gi…
jpivarski May 31, 2021
b0701ce
tweaks
jpivarski May 31, 2021
b9d045f
identifier changes
ioanaif May 31, 2021
3a431f3
more tweaks
jpivarski May 31, 2021
4fa58a0
adding missing functions + testing
ioanaif Jun 3, 2021
944bdda
First commit for Content classes refactoring
ioanaif Jun 3, 2021
eb54a77
Merge to match 'main' branch.
jpivarski Jun 3, 2021
f6dac85
Added tests for test-driven development.
jpivarski Jun 3, 2021
4664963
Content classes refactoring + testing
ioanaif Jun 9, 2021
f4a2126
Merge branch 'main' into ioanaif/content-classes-refactoring
jpivarski Jun 10, 2021
486c2bc
Test the Python 2.7 issue.
jpivarski Jun 10, 2021
99086b8
Temporarily focus on Linux Python 2.7 only.
jpivarski Jun 10, 2021
98c3ddb
Fixed Python 2.7 tests.
jpivarski Jun 10, 2021
ce06542
Integration of latest comments and __repr__
ioanaif Jun 10, 2021
085c227
Fixing conflicts
ioanaif Jun 10, 2021
d6fba87
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 10, 2021
4e5eaec
Recommit of previous commited Python 2.7 fixes
ioanaif Jun 11, 2021
8764241
Merge branch 'ioanaif/content-classes-refactoring' of https://github.…
ioanaif Jun 11, 2021
c3539bd
Minor tweaks
ioanaif Jun 11, 2021
f606cf5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 11, 2021
a0488f9
Minor tweaks
ioanaif Jun 11, 2021
bedba0d
Merge branch 'ioanaif/content-classes-refactoring' of https://github.…
ioanaif Jun 11, 2021
ebf7719
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jun 11, 2021
9c4f4a3
Fixing duplication errors
ioanaif Jun 14, 2021
c1df703
Merge branch 'ioanaif/content-classes-refactoring' of https://github.…
ioanaif Jun 14, 2021
03b069f
Added NotImplementedError stubs for unhandled slice types, to disting…
jpivarski Jun 15, 2021
e59f48f
Rename Index and Identifier '_T' to 'dtype' (because that's what it i…
jpivarski Jun 15, 2021
57630f0
More concise and uniform repr strings.
jpivarski Jun 16, 2021
d1244fc
Record implementation wasn't right.
jpivarski Jun 16, 2021
b8a1db3
Converted all assertions into exceptions with messages.
jpivarski Jun 16, 2021
f447182
Merge branch 'main' into ioanaif/content-classes-refactoring
jpivarski Jun 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/awkward/_v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@

import awkward._v2.index # noqa: F401
import awkward._v2.identifier # noqa: F401
import awkward._v2.array # noqa: F401
import awkward._v2.content # noqa: F401
import awkward._v2.record # noqa: F401
20 changes: 20 additions & 0 deletions src/awkward/_v2/array/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward._v2.content # noqa: F401
import awkward._v2.index # noqa: F401
import awkward._v2.record # noqa: F401

import awkward._v2.array.emptyarray # noqa: F401
import awkward._v2.array.numpyarray # noqa: F401
import awkward._v2.array.regulararray # noqa: F401
import awkward._v2.array.listarray # noqa: F401
import awkward._v2.array.listoffsetarray # noqa: F401
import awkward._v2.array.recordarray # noqa: F401
import awkward._v2.array.indexedarray # noqa: F401
import awkward._v2.array.indexedoptionarray # noqa: F401
import awkward._v2.array.bytemaskedarray # noqa: F401
import awkward._v2.array.bitmaskedarray # noqa: F401
import awkward._v2.array.unmaskedarray # noqa: F401
import awkward._v2.array.unionarray # noqa: F401
74 changes: 74 additions & 0 deletions src/awkward/_v2/array/bitmaskedarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import numbers

from awkward._v2.content import Content
from awkward._v2.index import Index
from awkward._v2.array.bytemaskedarray import ByteMaskedArray

import numpy as np


class BitMaskedArray(Content):
def __init__(self, mask, content, valid_when, length, lsb_order):
assert isinstance(mask, Index) and mask._T == np.uint8
assert isinstance(content, Content)
assert isinstance(valid_when, bool)
assert isinstance(length, numbers.Integral) and length >= 0
assert isinstance(lsb_order, bool)
assert len(mask) <= len(content)
self.mask = mask
self.content = content
self.valid_when = valid_when
self.length = length
self.lsb_order = lsb_order

def __len__(self):
return self.length

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
if self.lsb_order:
bit = bool(self.mask[where // 8] & (1 << (where % 8)))
else:
bit = bool(self.mask[where // 8] & (128 >> (where % 8)))
if bit == self.valid_when:
return self.content[where]
else:
return None

def _getitem_range(self, where):
# In general, slices must convert BitMaskedArray to ByteMaskedArray.
# FIXME this will return an array of bools, but now the first argument is of type Index and not List as before and Index doesn't have bool as an accepted data type
bytemask = np.unpackbits(
self.mask, bitorder=("little" if self.lsb_order else "big")
).view(np.bool_)
ioanaif marked this conversation as resolved.
Show resolved Hide resolved
start, stop, step = where.indices(len(self))
return ByteMaskedArray(
Index(bytemask[start:stop]),
self.content[start:stop],
valid_when=self.valid_when,
)

def _getitem_field(self, where):
return BitMaskedArray(
self.mask,
self.content[where],
valid_when=self.valid_when,
length=self.length,
lsb_order=self.lsb_order,
)

def _getitem_fields(self, where):
return BitMaskedArray(
self.mask,
self.content[where],
valid_when=self.valid_when,
length=self.length,
lsb_order=self.lsb_order,
)
62 changes: 62 additions & 0 deletions src/awkward/_v2/array/bytemaskedarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward as ak
from awkward._v2.content import Content
from awkward._v2.index import Index

np = ak.nplike.NumpyMetadata.instance()


class ByteMaskedArray(Content):
def __init__(self, mask, content, valid_when):
assert isinstance(mask, Index) and mask._T == np.int8
assert isinstance(content, Content)
assert isinstance(valid_when, bool)
assert len(mask) <= len(content)
self.mask = mask
self.content = content
self.valid_when = valid_when

def __len__(self):
return len(self.mask)

# def __repr__(self):
# return (
# "ByteMaskedArray("
# + repr(self.mask)
# + ", "
# + repr(self.content)
# + ", "
# + repr(self.valid_when)
# + ")"
# )

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
if self.mask[where] == self.valid_when:
return self.content[where]
else:
return None

def _getitem_range(self, where):
start, stop, step = where.indices(len(self))
return ByteMaskedArray(
Index(self.mask[start:stop]),
self.content[start:stop],
valid_when=self.valid_when,
)

def _getitem_field(self, where):
return ByteMaskedArray(
self.mask, self.content[where], valid_when=self.valid_when
)

def _getitem_fields(self, where):
return ByteMaskedArray(
self.mask, self.content[where], valid_when=self.valid_when
)
28 changes: 28 additions & 0 deletions src/awkward/_v2/array/emptyarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

from awkward._v2.content import Content


class EmptyArray(Content):
def __init__(self):
pass

def __repr__(self):
return "EmptyArray()"

def __len__(self):
return 0

def _getitem_at(self, where):
raise IndexError("array of type Empty has no index " + repr(where))

def _getitem_range(self, where):
return EmptyArray()

def _getitem_field(self, where):
raise IndexError("field " + repr(where) + " not found")

def _getitem_fields(self, where):
raise IndexError("fields " + repr(where) + " not found")
39 changes: 39 additions & 0 deletions src/awkward/_v2/array/indexedarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward as ak
from awkward._v2.content import Content
from awkward._v2.index import Index

np = ak.nplike.NumpyMetadata.instance()


class IndexedArray(Content):
def __init__(self, index, content):
assert isinstance(index, Index) and index._T in (np.int32, np.uint32, np.int64)
assert isinstance(content, Content)
self.index = index
self.content = content

def __len__(self):
return len(self.index)

def __repr__(self):
return "IndexedArray(" + repr(self.index) + ", " + repr(self.content) + ")"

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
return self.content[self.index[where]]

def _getitem_range(self, where):
return IndexedArray(Index(self.index[where.start : where.stop]), self.content)

def _getitem_field(self, where):
return IndexedArray(self.index, self.content[where])

def _getitem_fields(self, where):
return IndexedArray(self.index, self.content[where])
46 changes: 46 additions & 0 deletions src/awkward/_v2/array/indexedoptionarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward as ak
from awkward._v2.content import Content
from awkward._v2.index import Index

np = ak.nplike.NumpyMetadata.instance()


class IndexedOptionArray(Content):
def __init__(self, index, content):
assert isinstance(index, Index) and index._T in (np.int32, np.int64)
assert isinstance(content, Content)
self.index = index
self.content = content

def __len__(self):
return len(self.index)

def __repr__(self):
return (
"IndexedOptionArray(" + repr(self.index) + ", " + repr(self.content) + ")"
)

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
if self.index[where] < 0:
return None
else:
return self.content[self.index[where]]

def _getitem_range(self, where):
return IndexedOptionArray(
Index(self.index[where.start : where.stop]), self.content
)

def _getitem_field(self, where):
return IndexedOptionArray(self.index, self.content[where])

def _getitem_fields(self, where):
return IndexedOptionArray(self.index, self.content[where])
57 changes: 57 additions & 0 deletions src/awkward/_v2/array/listarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward as ak
from awkward._v2.content import Content
from awkward._v2.index import Index

np = ak.nplike.NumpyMetadata.instance()


class ListArray(Content):
def __init__(self, starts, stops, content):
assert isinstance(starts, Index) and starts._T in (
np.int32,
np.uint32,
np.int64,
)
assert isinstance(stops, Index) and starts._T == stops._T
assert isinstance(content, Content)
assert len(stops) >= len(starts) # usually equal
self.starts = starts
self.stops = stops
self.content = content

def __len__(self):
return len(self.starts)

def __repr__(self):
return (
"ListArray("
+ repr(self.starts)
+ ", "
+ repr(self.stops)
+ ", "
+ repr(self.content)
+ ")"
)

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
return self.content[self.starts[where] : self.stops[where]]

def _getitem_range(self, where):
start, stop, step = where.indices(len(self))
starts = Index(self.starts[start:stop])
stops = Index(self.stops[start:stop])
return ListArray(starts, stops, self.content)

def _getitem_field(self, where):
return ListArray(self.starts, self.stops, self.content[where])

def _getitem_fields(self, where):
return ListArray(self.starts, self.stops, self.content[where])
48 changes: 48 additions & 0 deletions src/awkward/_v2/array/listoffsetarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE

from __future__ import absolute_import

import awkward as ak
from awkward._v2.content import Content
from awkward._v2.index import Index

np = ak.nplike.NumpyMetadata.instance()


class ListOffsetArray(Content):
def __init__(self, offsets, content):
assert isinstance(offsets, Index) and offsets._T in (
np.int32,
np.uint32,
np.int64,
)
assert isinstance(content, Content)
assert len(offsets) != 0
self.offsets = offsets
self.content = content

def __len__(self):
return len(self.offsets) - 1

def __repr__(self):
return "ListOffsetArray(" + repr(self.offsets) + ", " + repr(self.content) + ")"

def _getitem_at(self, where):
if where < 0:
where += len(self)
if 0 > where or where >= len(self):
raise IndexError("array index out of bounds")
return self.content[self.offsets[where] : self.offsets[where + 1]]

def _getitem_range(self, where):
start, stop, step = where.indices(len(self))
offsets = self.offsets[start : stop + 1]
if len(offsets) == 0:
offsets = [0]
return ListOffsetArray(Index(offsets), self.content)

def _getitem_field(self, where):
return ListOffsetArray(self.offsets, self.content[where])

def _getitem_fields(self, where):
return ListOffsetArray(self.offsets, self.content[where])
Loading