Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unpin xarray, numpy, pandas, netcdf4 #25

Merged
merged 14 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ matrix:
include:
- name: 3.7 public
python: '3.7'
- name: 3.7 private
python: '3.7'
- name: 3.8 public
python: '3.8'
- name: 3.9 public
python: '3.9'
- name: 3.9 private
python: '3.9'
env:
- PRIVATE_ACCESS=1
- secure: "CzOQmNMkHXavXihZWYL+G5sbdYq8KLrBWnorZEPhvsKDIKy1hhORCc+pAMXg+bjrPRXfRqZnX0XRRCoZbD9Mo9VvA1hIsV7i5bBbjMoyBTUn3vED0CQNBCgjaA2rLsHlJMtYdLoCOOAiaU+rTu2xxf0grjgKARzLpVNENmPgP0YqiXPEc7rdY3cifalCBpHTQgvu7Z6FR1yAdRsMfskTIwPa/GlTCNF8ZR+efuobQJrtApfzBgiH7+NJI5Aq6u8PWD6LqONCm2ut0NKL7BMNRMgwS3pjERr2spRWrLiCz05Y4icaUmhajPjCl3kMIjuHdw1OgvwQHuSW9hcgt0AXZoIC8qJqg5V39LrsYYPd5/sg7vcTZ+VRhWF5zDBMvTO0PFt36tpj9xnr2ATIPlp1ACXwi+fGPkPAJp3ZIHbl36lji6sB4WLwIISongseizqTAHKowmpCGqEL6TZB65/MThWBeccRNB1N4a3wG34Eu7n1XXqecK1c+68JO98fOQxwmQ/utOkQRcVQzmGyARUk7WyupoqMmAZbWxOJ5AzyXPiK2OGXmiVJSwlMQKtF7eqkLs8wWeQD+zQj2qoSqF45LdFQsww19W2wC0wHuTV6nDBaKB59lY5qFufDWT+Gh06jLk8UpgYANh9f3fH5ZgUKfnH7I17StuDEpxCZ1kxVKcA="
Expand Down
50 changes: 47 additions & 3 deletions brainio/assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,8 @@ def array_is_element(arr, element):
return len(arr) == 1 and arr[0] == element


def get_metadata(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
def get_metadata_before_2022_06(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
"""
Return coords and/or indexes or index levels from an assembly, yielding either `name` or `(name, dims, values)`.
"""
Expand Down Expand Up @@ -362,6 +362,50 @@ def what(name, dims, values, names_only):
yield what(name, values.dims, values.values, names_only)


def get_metadata_after_2022_06(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
"""
Return coords and/or indexes or index levels from an assembly, yielding either `name` or `(name, dims, values)`.
"""
def what(name, dims, values, names_only):
if names_only:
return name
else:
return name, dims, values
if dims is None:
dims = assembly.dims + (None,) # all dims plus dimensionless coords
for name, values in assembly.coords.items():
none_but_keep = (not values.dims) and None in dims
shared = not (set(values.dims).isdisjoint(set(dims)))
if none_but_keep or shared:
if name in assembly.indexes: # it's an index
index = assembly.indexes[name]
if len(index.names) > 1: # it's a MultiIndex or level
if name in index.names: # it's a level
if include_levels:
yield what(name, values.dims, values.values, names_only)
else: # it's a MultiIndex
if include_multi_indexes:
yield what(name, values.dims, values.values, names_only)
else: # it's a single Index
if include_indexes:
yield what(name, values.dims, values.values, names_only)
else: # it's a coord
if include_coords:
yield what(name, values.dims, values.values, names_only)


def get_metadata(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
try:
xr.DataArray().stack(create_index=True)
yield from get_metadata_after_2022_06(assembly, dims, names_only, include_coords,
include_indexes, include_multi_indexes, include_levels)
except TypeError as e:
yield from get_metadata_before_2022_06(assembly, dims, names_only, include_coords,
include_indexes, include_multi_indexes, include_levels)


def coords_for_dim(assembly, dim):
result = OrderedDict()
meta = get_metadata(assembly, dims=(dim,), include_indexes=False, include_levels=False)
Expand Down Expand Up @@ -415,7 +459,7 @@ def correct_stimulus_id_name(cls, assembly):
names = list(get_metadata(assembly, dims=('presentation',), names_only=True))
if 'image_id' in names and 'stimulus_id' not in names:
assembly = assembly.assign_coords(
stimulus_id=('presentation', assembly['image_id']),
stimulus_id=('presentation', assembly['image_id'].data),
)
return assembly

Expand Down
6 changes: 3 additions & 3 deletions brainio/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,9 @@ def write_netcdf(assembly, target_netcdf_file, append=False, group=None, compres
mode = "a" if append else "w"
target_netcdf_file.parent.mkdir(parents=True, exist_ok=True)
if compress:
ds = assembly.to_dataset(name="data")
compression = dict(zlib=True, complevel=1)
encoding = {var: compression for var in ds.variables}
ds = assembly.to_dataset(name="data", promote_attrs=True)
compression = dict(zlib=True, complevel=9)
encoding = {var: compression for var in ds.data_vars}
ds.to_netcdf(target_netcdf_file, mode=mode, group=group, encoding=encoding)
else:
assembly.to_netcdf(target_netcdf_file, mode=mode, group=group)
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
"tqdm",
"Pillow",
"entrypoints",
"numpy>=1.16.5, !=1.21.*",
"pandas>=1.2.0, !=1.3.0",
"xarray==0.17.0",
"netcdf4==1.5.8",
"numpy",
"pandas",
"xarray!=2022.06.0", # 2022.06.0 has a bug which breaks BrainIO: https://github.com/pydata/xarray/issues/6836
"netcdf4!=1.6.0", # https://github.com/Unidata/netcdf4-python/issues/1175,
]

setup(
name='brainio',
version='0.1.0',
version='0.2.0',
description="Data management for quantitative comparison of brains and brain-inspired systems",
long_description=readme,
author="Jon Prescott-Roy, Martin Schrimpf",
Expand Down
39 changes: 19 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,8 @@ def make_proto_assembly():


def scattered_floats(lo, hi, num):
# a kludge: looks stochastic, but deterministic
mid = (hi + lo) / 2
half = mid - lo
jump = 8
return [mid + np.sin(x) * half for x in range(2, num * (jump + 1), jump)][:num]
rng = np.random.default_rng(12345)
return rng.random(num) * (hi - lo) + lo


# taken from values in /braintree/data2/active/users/sachis/projects/oasis900/monkeys/oleo/mworksproc/oleo_oasis900_210216_113846_mwk.csv
Expand Down Expand Up @@ -90,23 +87,25 @@ def make_meta_assembly():
return a


def make_spk_assembly():
def make_spk_assembly(magnitude=3):
size = 10**magnitude
half = int((10**magnitude) / 2)
coords = {
"neuroid_id": ("event", ["A-019", "D-009"]*500),
"project": ("event", ["test"]*1000),
"datetime": ("event", np.repeat(np.datetime64('2021-02-16T11:41:55.000000000'), 1000)),
"animal": ("event", ["testo"]*1000),
"hemisphere": ("event", ["L", "R"]*500),
"region": ("event", ["V4", "IT"]*500),
"subregion": ("event", ["V4", "aIT"]*500),
"array": ("event", ["6250-002416", "4865-233455"]*500),
"bank": ("event", ["A", "D"]*500),
"electrode": ("event", ["019", "009"]*500),
"column": ("event", [5, 2]*500),
"row": ("event", [4, 8]*500),
"label": ("event", ["elec46", "elec123"]*500),
"neuroid_id": ("event", ["A-019", "D-009"]*half),
"project": ("event", ["test"]*size),
"datetime": ("event", np.repeat(np.datetime64('2021-02-16T11:41:55.000000000'), size)),
"animal": ("event", ["testo"]*size),
"hemisphere": ("event", ["L", "R"]*half),
"region": ("event", ["V4", "IT"]*half),
"subregion": ("event", ["V4", "aIT"]*half),
"array": ("event", ["6250-002416", "4865-233455"]*half),
"bank": ("event", ["A", "D"]*half),
"electrode": ("event", ["019", "009"]*half),
"column": ("event", [5, 2]*half),
"row": ("event", [4, 8]*half),
"label": ("event", ["elec46", "elec123"]*half),
}
data = sorted(scattered_floats(67.7, 21116.2, 1000))
data = sorted(scattered_floats(67.7, 21116.2, size))
a = SpikeTimesAssembly(
data=data,
coords=coords,
Expand Down
134 changes: 108 additions & 26 deletions tests/test_assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,48 @@
SpikeTimesAssembly, get_metadata


def test_get_metadata():
xr.show_versions()
# assembly, dims, names_only, include_coords, include_indexes, include_multi_indexes, include_levels
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
coords={
'up': ("a", ['alpha', 'alpha', 'beta', 'beta', 'beta', 'beta']),
'down': ("a", [1, 1, 1, 1, 2, 2]),
'why': ("a", ['yes', 'yes', 'yes', 'yes', 'yes', 'no']),
'b': ('b', ['x', 'y', 'z']),
},
dims=['a', 'b']
)
assy = assy.reset_index('why')
assert set(get_metadata(assy, None, True, True, True, True, True)) == {'a', 'up', 'down', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, True, False)) == {'a', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, False, True)) == {'up', 'down', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, False, False)) == {'why', 'b'}
assert set(get_metadata(assy, None, True, True, False, True, True)) == {'a', 'up', 'down', 'why'}
assert set(get_metadata(assy, None, True, True, False, True, False)) == {'a', 'why'}
assert set(get_metadata(assy, None, True, True, False, False, True)) == {'up', 'down', 'why'}
assert set(get_metadata(assy, None, True, True, False, False, False)) == {'why'}
assert set(get_metadata(assy, None, True, False, True, True, True)) == {'a', 'up', 'down', 'b'}
assert set(get_metadata(assy, None, True, False, True, True, False)) == {'a', 'b'}
assert set(get_metadata(assy, None, True, False, True, False, True)) == {'up', 'down', 'b'}
assert set(get_metadata(assy, None, True, False, True, False, False)) == {'b'}
assert set(get_metadata(assy, None, True, False, False, True, True)) == {'a', 'up', 'down'}
assert set(get_metadata(assy, None, True, False, False, True, False)) == {'a'}
assert set(get_metadata(assy, None, True, False, False, False, True)) == {'up', 'down'}
assert set(get_metadata(assy, None, True, False, False, False, False)) == set()

a = make_proto_assembly()
md_all = list(get_metadata(a))
assert len(md_all) == 4
md_coo = list(get_metadata(a, include_indexes=False, include_levels=False))
assert len(md_coo) == 0
md_ind = list(get_metadata(a, include_coords=False, include_indexes=True, include_multi_indexes=True, include_levels=False))
assert len(md_ind) == 2
md_lev = list(get_metadata(a, include_coords=False, include_indexes=False))
assert len(md_lev) == 4


def test_get_levels():
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
Expand All @@ -40,12 +82,11 @@ def test_wrap_dataarray(self):
dims=['a', 'b']
)
assert "up" in da.coords
assert da["a"].variable.level_names is None
assert "a" not in da.indexes
da = gather_indexes(da)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da["a"].variable.level_names == ["up", "down"]
assert da.indexes["a"].names == ["up", "down"]
da = DataArray(da)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da.indexes["a"].names == ["up", "down"]
assert da["up"] is not None

def test_wrap_dataassembly(self):
Expand All @@ -58,11 +99,9 @@ def test_wrap_dataassembly(self):
},
dims=['a', 'b']
)
assert assy.coords.variables["a"].level_names == ["up", "down"]
assert assy["a"].variable.level_names == ["up", "down"]
assert assy.indexes["a"].names == ["up", "down"]
da = DataArray(assy)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da["a"].variable.level_names == ["up", "down"]
assert assy.indexes["a"].names == ["up", "down"]
assert da["up"] is not None

def test_reset_index(self):
Expand Down Expand Up @@ -109,6 +148,7 @@ def test_getitem(self):
)
single = assy[0, 0]
assert type(single) is type(assy)
assert single == 1

def test_is_fastpath(self):
"""In DataAssembly.__init__ we have to check whether fastpath is present in a set of arguments and true
Expand Down Expand Up @@ -164,17 +204,14 @@ def test_align(self):
dims=['a', 'b']
)
assert hasattr(da1, "up")
assert da1.coords.variables["a"].level_names == ["up", "down"]
assert da1["a"].variable.level_names == ["up", "down"]
assert da1.indexes["a"].names == ["up", "down"]
assert da1["up"] is not None
aligned1, aligned2 = xr.align(da1, da2, join="outer")
assert hasattr(aligned1, "up")
assert aligned1.coords.variables["a"].level_names == ["up", "down"]
assert aligned1["a"].variable.level_names == ["up", "down"]
assert aligned1.indexes["a"].names == ["up", "down"]
assert aligned1["up"] is not None
assert hasattr(aligned2, "up")
assert aligned2.coords.variables["a"].level_names == ["up", "down"]
assert aligned2["a"].variable.level_names == ["up", "down"]
assert aligned2.indexes["a"].names == ["up", "down"]
assert aligned2["up"] is not None


Expand Down Expand Up @@ -202,6 +239,60 @@ def test_incorrect_coord(self):
d.sel(coordB=0)


class TestPlainGroupy:

def test_on_data_array(self):
d = DataArray(
data=[
[0, 1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12, 13],
[14, 15, 16, 17, 18, 19, 20]
],
coords={
"greek": ("a", ['alpha', 'beta', 'gamma']),
"colors": ("a", ['red', 'green', 'blue']),
"compass": ("b", ['north', 'south', 'east', 'west', 'northeast', 'southeast', 'southwest']),
"integer": ("b", [0, 1, 2, 3, 4, 5, 6]),
},
dims=("a", "b")
)
d = gather_indexes(d)
g = d.groupby('greek')
# with xarray==2022.06.0, the following line fails with:
# ValueError: conflicting multi-index level name 'greek' with dimension 'greek'
m = g.mean(...)
c = DataArray(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert m.equals(c)

def test_on_data_assembly(self):
d = DataAssembly(
data=[
[0, 1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12, 13],
[14, 15, 16, 17, 18, 19, 20]
],
coords={
"greek": ("a", ['alpha', 'beta', 'gamma']),
"colors": ("a", ['red', 'green', 'blue']),
"compass": ("b", ['north', 'south', 'east', 'west', 'northeast', 'southeast', 'southwest']),
"integer": ("b", [0, 1, 2, 3, 4, 5, 6]),
},
dims=("a", "b")
)
g = d.groupby('greek')
m = g.mean(...)
c = DataAssembly(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert m.equals(c)


class TestMultiGroupby:
def test_single_dimension(self):
d = DataAssembly([[1, 2, 3], [4, 5, 6]], coords={'a': ['a', 'b'], 'b': ['x', 'y', 'z']}, dims=['a', 'b'])
Expand All @@ -228,13 +319,14 @@ def test_single_coord(self):
},
dims=("a", "b")
)
g = d.multi_groupby(['greek']).mean(...)
g = d.multi_groupby(['greek'])
m = g.mean(...)
c = DataAssembly(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert g.equals(c)
assert m.equals(c)

def test_single_dim_multi_coord(self):
d = DataAssembly([1, 2, 3, 4, 5, 6],
Expand Down Expand Up @@ -452,15 +544,5 @@ def test_load_extras(self, test_stimulus_set_identifier):
assert extra.shape == (40,)


def test_get_metadata():
a = make_proto_assembly()
md_all = list(get_metadata(a))
assert len(md_all) == 4
md_coo = list(get_metadata(a, include_indexes=False, include_levels=False))
assert len(md_coo) == 0
md_ind = list(get_metadata(a, include_coords=False, include_indexes=True, include_multi_indexes=True, include_levels=False))
assert len(md_ind) == 2
md_lev = list(get_metadata(a, include_coords=False, include_indexes=False))
assert len(md_lev) == 4


4 changes: 2 additions & 2 deletions tests/test_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def test_package_extras(test_stimulus_set_identifier, test_catalog_identifier, b


def test_compression(test_write_netcdf_path):
write_netcdf(make_spk_assembly(), test_write_netcdf_path, compress=False)
write_netcdf(make_spk_assembly(6), test_write_netcdf_path, compress=False)
uncompressed = test_write_netcdf_path.stat().st_size
write_netcdf(make_spk_assembly(), test_write_netcdf_path, compress=True)
write_netcdf(make_spk_assembly(6), test_write_netcdf_path, compress=True)
compressed = test_write_netcdf_path.stat().st_size
assert uncompressed > compressed

Expand Down