Skip to content

Commit 15cd669

Browse files
author
Altay Sansal
committed
add block indexing capabilities
1 parent b248e4f commit 15cd669

File tree

4 files changed

+467
-12
lines changed

4 files changed

+467
-12
lines changed

docs/api/core.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ The Array class (``zarr.core``)
1010
.. automethod:: set_basic_selection
1111
.. automethod:: get_mask_selection
1212
.. automethod:: set_mask_selection
13+
.. automethod:: get_block_selection
14+
.. automethod:: set_block_selection
1315
.. automethod:: get_coordinate_selection
1416
.. automethod:: set_coordinate_selection
1517
.. automethod:: get_orthogonal_selection

zarr/core.py

Lines changed: 198 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
OIndex,
2323
OrthogonalIndexer,
2424
VIndex,
25+
BlockIndex,
26+
BlockIndexer,
2527
PartialChunkIterator,
2628
check_fields,
2729
check_no_multi_fields,
@@ -139,6 +141,7 @@ class Array:
139141
info
140142
vindex
141143
oindex
144+
blocks
142145
write_empty_chunks
143146
meta_array
144147
@@ -154,6 +157,8 @@ class Array:
154157
set_mask_selection
155158
get_coordinate_selection
156159
set_coordinate_selection
160+
get_block_selection
161+
set_block_selection
157162
digest
158163
hexdigest
159164
resize
@@ -230,6 +235,7 @@ def __init__(
230235
# initialize indexing helpers
231236
self._oindex = OIndex(self)
232237
self._vindex = VIndex(self)
238+
self._blocks = BlockIndex(self)
233239

234240
def _load_metadata(self):
235241
"""(Re)load metadata from store."""
@@ -577,6 +583,12 @@ def vindex(self):
577583
:func:`set_mask_selection` for documentation and examples."""
578584
return self._vindex
579585

586+
@property
587+
def blocks(self):
588+
"""Shortcut for blocked chunked indexing, see :func:`get_block_selection` and
589+
:func:`set_block_selection` for documentation and examples."""
590+
return self._blocks
591+
580592
@property
581593
def write_empty_chunks(self) -> bool:
582594
"""A Boolean, True if chunks composed of the array's fill value
@@ -814,7 +826,8 @@ def __getitem__(self, selection):
814826
--------
815827
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
816828
get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection,
817-
set_orthogonal_selection, vindex, oindex, __setitem__
829+
set_orthogonal_selection, get_block_selection, set_block_selection,
830+
vindex, oindex, blocks, __setitem__
818831
819832
"""
820833
fields, pure_selection = pop_fields(selection)
@@ -933,7 +946,8 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None):
933946
--------
934947
set_basic_selection, get_mask_selection, set_mask_selection,
935948
get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection,
936-
set_orthogonal_selection, vindex, oindex, __getitem__, __setitem__
949+
set_orthogonal_selection, get_block_selection, set_block_selection,
950+
vindex, oindex, blocks, __getitem__, __setitem__
937951
938952
"""
939953

@@ -1089,7 +1103,8 @@ def get_orthogonal_selection(self, selection, out=None, fields=None):
10891103
--------
10901104
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
10911105
get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection,
1092-
vindex, oindex, __getitem__, __setitem__
1106+
get_block_selection, set_block_selection,
1107+
vindex, oindex, blocks, __getitem__, __setitem__
10931108
10941109
"""
10951110

@@ -1160,7 +1175,8 @@ def get_coordinate_selection(self, selection, out=None, fields=None):
11601175
--------
11611176
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
11621177
get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection,
1163-
vindex, oindex, __getitem__, __setitem__
1178+
get_block_selection, set_block_selection,
1179+
vindex, oindex, blocks, __getitem__, __setitem__
11641180
11651181
"""
11661182

@@ -1185,6 +1201,89 @@ def get_coordinate_selection(self, selection, out=None, fields=None):
11851201

11861202
return out
11871203

1204+
def get_block_selection(self, selection, out=None, fields=None):
1205+
"""Retrieve a selection of individual chunk blocks, by providing the indices
1206+
(coordinates) for each chunk block.
1207+
1208+
Parameters
1209+
----------
1210+
selection : tuple
1211+
An integer (coordinate) or slice for each dimension of the array.
1212+
out : ndarray, optional
1213+
If given, load the selected data directly into this array.
1214+
fields : str or sequence of str, optional
1215+
For arrays with a structured dtype, one or more fields can be specified to
1216+
extract data for.
1217+
1218+
Returns
1219+
-------
1220+
out : ndarray
1221+
A NumPy array containing the data for the requested selection.
1222+
1223+
Examples
1224+
--------
1225+
Setup a 2-dimensional array::
1226+
1227+
>>> import zarr
1228+
>>> import numpy as np
1229+
>>> z = zarr.array(np.arange(100).reshape(10, 10), chunks=(3, 3))
1230+
1231+
Retrieve items by specifying their block coordinates::
1232+
1233+
>>> z.get_block_selection((1, slice(None)))
1234+
array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
1235+
[40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
1236+
[50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])
1237+
1238+
Which is equivalent to:
1239+
>>> z[3:6, :]
1240+
array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
1241+
[40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
1242+
[50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])
1243+
1244+
For convenience, the block selection functionality is also available via the
1245+
`blocks` property, e.g.::
1246+
1247+
>>> z.blocks[1]
1248+
array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
1249+
[40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
1250+
[50, 51, 52, 53, 54, 55, 56, 57, 58, 59]])
1251+
1252+
Notes
1253+
-----
1254+
Block indexing is a convenience indexing method to work on individual chunks
1255+
with chunk index slicing. It has the same concept as Dask's `Array.blocks`
1256+
indexing.
1257+
1258+
Slices are supported. However, only with a step size of one.
1259+
1260+
Block index arrays may be multidimensional to index multidimensional arrays.
1261+
For example:
1262+
1263+
>>> z.blocks[0, 1:3]
1264+
array([[ 3, 4, 5, 6, 7, 8],
1265+
[13, 14, 15, 16, 17, 18],
1266+
[23, 24, 25, 26, 27, 28]])
1267+
1268+
See Also
1269+
--------
1270+
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
1271+
get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection,
1272+
set_coordinate_selection, set_block_selection,
1273+
vindex, oindex, blocks, __getitem__, __setitem__
1274+
1275+
"""
1276+
if not self._cache_metadata:
1277+
self._load_metadata()
1278+
1279+
# check args
1280+
check_fields(fields, self._dtype)
1281+
1282+
# setup indexer
1283+
indexer = BlockIndexer(selection, self)
1284+
1285+
return self._get_selection(indexer=indexer, out=out, fields=fields)
1286+
11881287
def get_mask_selection(self, selection, out=None, fields=None):
11891288
"""Retrieve a selection of individual items, by providing a Boolean array of the
11901289
same shape as the array against which the selection is being made, where True
@@ -1238,8 +1337,8 @@ def get_mask_selection(self, selection, out=None, fields=None):
12381337
--------
12391338
get_basic_selection, set_basic_selection, set_mask_selection,
12401339
get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection,
1241-
set_coordinate_selection, vindex, oindex, __getitem__, __setitem__
1242-
1340+
set_coordinate_selection, get_block_selection, set_block_selection,
1341+
vindex, oindex, blocks, __getitem__, __setitem__
12431342
"""
12441343

12451344
# refresh metadata
@@ -1376,7 +1475,8 @@ def __setitem__(self, selection, value):
13761475
--------
13771476
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
13781477
get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection,
1379-
set_orthogonal_selection, vindex, oindex, __getitem__
1478+
set_orthogonal_selection, get_block_selection, set_block_selection,
1479+
vindex, oindex, blocks, __getitem__
13801480
13811481
"""
13821482
fields, pure_selection = pop_fields(selection)
@@ -1464,7 +1564,8 @@ def set_basic_selection(self, selection, value, fields=None):
14641564
--------
14651565
get_basic_selection, get_mask_selection, set_mask_selection,
14661566
get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection,
1467-
set_orthogonal_selection, vindex, oindex, __getitem__, __setitem__
1567+
set_orthogonal_selection, get_block_selection, set_block_selection,
1568+
vindex, oindex, blocks, __getitem__, __setitem__
14681569
14691570
"""
14701571

@@ -1555,7 +1656,8 @@ def set_orthogonal_selection(self, selection, value, fields=None):
15551656
--------
15561657
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
15571658
get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection,
1558-
vindex, oindex, __getitem__, __setitem__
1659+
get_block_selection, set_block_selection,
1660+
vindex, oindex, blocks, __getitem__, __setitem__
15591661
15601662
"""
15611663

@@ -1627,7 +1729,8 @@ def set_coordinate_selection(self, selection, value, fields=None):
16271729
--------
16281730
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
16291731
get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection,
1630-
vindex, oindex, __getitem__, __setitem__
1732+
get_block_selection, set_block_selection,
1733+
vindex, oindex, blocks, __getitem__, __setitem__
16311734
16321735
"""
16331736

@@ -1654,6 +1757,89 @@ def set_coordinate_selection(self, selection, value, fields=None):
16541757

16551758
self._set_selection(indexer, value, fields=fields)
16561759

1760+
def set_block_selection(self, selection, value, fields=None):
1761+
"""Modify a selection of individual blocks, by providing the chunk indices
1762+
(coordinates) for each block to be modified.
1763+
1764+
Parameters
1765+
----------
1766+
selection : tuple
1767+
An integer (coordinate) or slice for each dimension of the array.
1768+
value : scalar or array-like
1769+
Value to be stored into the array.
1770+
fields : str or sequence of str, optional
1771+
For arrays with a structured dtype, one or more fields can be specified to set
1772+
data for.
1773+
1774+
Examples
1775+
--------
1776+
Setup a 2-dimensional array::
1777+
1778+
>>> import zarr
1779+
>>> import numpy as np
1780+
>>> z = zarr.zeros((6, 6), dtype=int, chunks=2)
1781+
1782+
Set data for a selection of items::
1783+
1784+
>>> z.set_block_selection((1, 0), 1)
1785+
>>> z[...]
1786+
array([[0, 0, 0, 0, 0, 0],
1787+
[0, 0, 0, 0, 0, 0],
1788+
[1, 1, 0, 0, 0, 0],
1789+
[1, 1, 0, 0, 0, 0],
1790+
[0, 0, 0, 0, 0, 0],
1791+
[0, 0, 0, 0, 0, 0]])
1792+
1793+
For convenience, this functionality is also available via the `blocks` property.
1794+
E.g.::
1795+
1796+
>>> z.blocks[2, 1] = 4
1797+
>>> z[...]
1798+
array([[0, 0, 0, 0, 0, 0],
1799+
[0, 0, 0, 0, 0, 0],
1800+
[1, 1, 0, 0, 0, 0],
1801+
[1, 1, 0, 0, 0, 0],
1802+
[0, 0, 4, 4, 0, 0],
1803+
[0, 0, 4, 4, 0, 0]])
1804+
1805+
>>> z.blocks[:, 2] = 7
1806+
>>> z[...]
1807+
array([[0, 0, 0, 0, 7, 7],
1808+
[0, 0, 0, 0, 7, 7],
1809+
[1, 1, 0, 0, 7, 7],
1810+
[1, 1, 0, 0, 7, 7],
1811+
[0, 0, 4, 4, 7, 7],
1812+
[0, 0, 4, 4, 7, 7]])
1813+
1814+
Notes
1815+
-----
1816+
Block indexing is a convenience indexing method to work on individual chunks
1817+
with chunk index slicing. It has the same concept as Dask's `Array.blocks`
1818+
indexing.
1819+
1820+
Slices are supported. However, only with a step size of one.
1821+
1822+
See Also
1823+
--------
1824+
get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection,
1825+
get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection,
1826+
get_block_selection, set_block_selection,
1827+
vindex, oindex, blocks, __getitem__, __setitem__
1828+
1829+
"""
1830+
# guard conditions
1831+
if self._read_only:
1832+
raise ReadOnlyError()
1833+
1834+
# refresh metadata
1835+
if not self._cache_metadata:
1836+
self._load_metadata_nosync()
1837+
1838+
# setup indexer
1839+
indexer = BlockIndexer(selection, self)
1840+
1841+
self._set_selection(indexer, value, fields=fields)
1842+
16571843
def set_mask_selection(self, selection, value, fields=None):
16581844
"""Modify a selection of individual items, by providing a Boolean array of the
16591845
same shape as the array against which the selection is being made, where True
@@ -1712,7 +1898,8 @@ def set_mask_selection(self, selection, value, fields=None):
17121898
--------
17131899
get_basic_selection, set_basic_selection, get_mask_selection,
17141900
get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection,
1715-
set_coordinate_selection, vindex, oindex, __getitem__, __setitem__
1901+
set_coordinate_selection, get_block_selection, set_block_selection,
1902+
vindex, oindex, blocks, __getitem__, __setitem__
17161903
17171904
"""
17181905

0 commit comments

Comments
 (0)