Skip to content

Commit

Permalink
Add OS independent paths for Allen dataset (#88)
Browse files Browse the repository at this point in the history
* fix path in ca_movie_decoding.py

* fix paths to support windows

* remove print statement

* Add example implementation using pathlib

* add pathlib paths to allen dataset

* run allen tests

* remove os import from allen dataset
  • Loading branch information
gonlairo authored Oct 3, 2023
1 parent 3f06b71 commit 94fa87a
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 117 deletions.
39 changes: 21 additions & 18 deletions cebra/datasets/allen/ca_movie.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand Down Expand Up @@ -66,9 +66,9 @@ def __init__(
num_neurons=10,
seed=111,
area="VISp",
frame_feature_path=get_datapath(
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
),
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"features" / "allen_movies" / "vit_base" / "8" /
"movie_one_image_stack.npz" / "testfeat.pth",
pca=False,
load=None,
):
Expand Down Expand Up @@ -116,16 +116,17 @@ def _get_pseudo_mice(self, area: str):
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.
"""

self.area = area
list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
exp_containers = [
int(mice.split(f"{area}/")[1].replace(".mat", ""))
for mice in list_mice
]
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*.mat")
exp_containers = [int(file.stem) for file in list_mice]

## Load summary file
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
summary = pd.read_csv(
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
## Filter excitatory neurons in V1
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
(summary["target"] == area) &
Expand Down Expand Up @@ -169,9 +170,10 @@ def _convert_to_nums(string):
indices2.sort()
indices3.sort()
indices = [indices1, indices2, indices3]
matfile = get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
)
matfile = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
area) / f"{exp_container}.mat"
traces = scipy.io.loadmat(matfile)
for n, i in enumerate(seq_sessions):
session = traces["filtered_traces_days_events"][n, 0][
Expand Down Expand Up @@ -214,9 +216,10 @@ class AllenCaMoviePreLoadDataset(AllenCaMovieDataset):
"""

def __init__(self, num_neurons, seed):
preload = get_datapath(
f"allen_preload/allen-movie1-ca-{num_neurons}-{seed}.jl")
if not os.path.isfile(preload):
preload = pathlib.Path(
_DEFAULT_DATADIR
) / "allen_preload" / f"allen-movie1-ca-{num_neurons}-{seed}.jl"
if not preload.is_file():
print("The dataset is not yet preloaded.")
preload = None
super().__init__(num_neurons=num_neurons, seed=seed, load=preload)
39 changes: 21 additions & 18 deletions cebra/datasets/allen/ca_movie_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -41,6 +42,8 @@
from cebra.datasets.allen import SEEDS
from cebra.datasets.allen import SEEDS_DISJOINT

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie-{num_movie}-ca-{cortex}-{num_neurons}-{split_flag}-{test_repeat}-{seed}",
Expand Down Expand Up @@ -107,9 +110,9 @@ def _get_video_features(self, num_movie="one"):
"""

frame_feature_path = get_datapath(
f"allen/features/allen_movies/vit_base/8/movie_{num_movie}_image_stack.npz/testfeat.pth"
)
frame_feature_path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "features" / "allen_movies" / "vit_base" / "8" / f"movie_{num_movie}_image_stack.npz" / "testfeat.pth"
frame_feature = torch.load(frame_feature_path)
return frame_feature

Expand Down Expand Up @@ -171,15 +174,14 @@ def _get_pseudo_mice(self, area, num_movie):
"""

list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
exp_containers = [
int(mice.split(f"{area}/")[1].replace(".mat", ""))
for mice in list_mice
]
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*.mat")
exp_containers = [int(file.stem) for file in list_mice]
## Load summary file
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
summary = pd.read_csv(
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
## Filter excitatory neurons in V1
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
(summary["target"] == area) &
Expand Down Expand Up @@ -223,9 +225,10 @@ def _convert_to_nums(string):
indices2.sort()
indices3.sort()
indices = [indices1, indices2, indices3]
matfile = get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
)
matfile = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
area) / f"{exp_container}.mat"
traces = scipy.io.loadmat(matfile)
for n, i in enumerate(seq_sessions):
session = traces["filtered_traces_days_events"][n, 0][
Expand Down Expand Up @@ -325,10 +328,10 @@ def _get_pseudo_mice(self, area, num_movie):
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.
"""

list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*")

def _get_neural_data(num_movie, mat_file):
mat = scipy.io.loadmat(mat_file)
Expand Down
1 change: 0 additions & 1 deletion cebra/datasets/allen/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import glob
import hashlib
import os

import h5py
import joblib
Expand Down
24 changes: 12 additions & 12 deletions cebra/datasets/allen/make_neuropixel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import argparse
import glob
import os
import pathlib

import h5py
import joblib as jl
Expand All @@ -32,6 +32,8 @@

from cebra.datasets import get_datapath

_DEFAULT_DATADIR = get_datapath()


def _filter_units(
unit_ids: npt.NDArray[np.int64],
Expand Down Expand Up @@ -153,13 +155,13 @@ def _spike_counts(bin_edges: npt.NDArray[np.float64], units: list):


def read_neuropixel(
path: str = "/shared/neuropixel/*/*.nwb",
path: str = pathlib.Path("/shared/neuropixel/"),
cortex: str = "VISp",
sampling_rate: float = 120.0,
):
"""Load 120Hz Neuropixels data recorded in the specified cortex during the movie1 stimulus.
The Neuropixels recordin is filtered and transformed to spike counts in a bin size specified by the sampling rat.
The Neuropixels recording is filtered and transformed to spike counts in a bin size specified by the sampling rat.
Args:
path: The wildcard file path where the neuropixels .nwb files are located.
Expand All @@ -168,7 +170,7 @@ def read_neuropixel(
"""

files = glob.glob(path)
files = path.glob("*/*.nwb")
sessions = {}
len_recording = []
session_frames = []
Expand Down Expand Up @@ -238,7 +240,8 @@ def read_neuropixel(
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", default="/shared/neuropixel", type=str)
parser.add_argument("--save-path",
default=get_datapath("allen_movie1_neuropixel/VISp/"),
default=pathlib.Path(_DEFAULT_DATADIR) /
"allen_movie1_neuropixel" / "VISp",
type=str)
parser.add_argument("--sampling-rate", default=120, type=float)
parser.add_argument("--cortex", default="VISp", type=str)
Expand All @@ -255,17 +258,14 @@ def read_neuropixel(
"neural": sessions_dic,
"frames": session_frames
},
os.path.join(
args.save_path,
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl"),
Path(args.save_path) /
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl",
)
jl.dump(
{
"neural": pseudo_mice,
"frames": pseudo_mice_frames
},
os.path.join(
args.save_path,
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
),
Path(args.save_path) /
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
)
20 changes: 11 additions & 9 deletions cebra/datasets/allen/neuropixel_movie.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -38,6 +39,8 @@
from cebra.datasets.allen import NUM_NEURONS
from cebra.datasets.allen import SEEDS

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie1-neuropixel-{num_neurons}-{seed}-10ms",
Expand Down Expand Up @@ -70,10 +73,10 @@ def _get_pseudo_mice(self, area="VISp"):
"""
self.area = area
list_recording = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{area}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / area / "neuropixel_pseudomouse_120_filtered.jl"
list_recording = joblib.load(path)
pseudo_mice = list_recording["neural"]

return pseudo_mice.transpose(1, 0)
Expand All @@ -87,10 +90,9 @@ def _get_index(self, frame_feature):
frame feature: The video frame feature.
"""

list_recording = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{self.area}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / self.area / "neuropixel_pseudomouse_120_filtered.jl"
list_recording = joblib.load(path)
frames_index = list_recording["frames"]
return frame_feature[frames_index]
38 changes: 19 additions & 19 deletions cebra/datasets/allen/neuropixel_movie_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -40,6 +41,8 @@
from cebra.datasets.allen import SEEDS
from cebra.datasets.allen import SEEDS_DISJOINT

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie-{num_movie}-neuropixel-{cortex}-{num_neurons}-{split_flag}-10-{seed}",
Expand Down Expand Up @@ -87,11 +90,10 @@ def _get_pseudo_mice(self, cortex: str, num_movie: str = "one"):
Args:
cortex: The visual cortical area.
"""

data = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / cortex / "neuropixel_pseudomouse_120_filtered.jl"
data = joblib.load(path)
return data

def _split(self, pseudo_mice, frame_feature):
Expand Down Expand Up @@ -148,25 +150,23 @@ class AllenNeuropixelMovie120HzCorticesDisjointDataset(
"""

def __init__(
self,
group,
num_neurons,
seed=111,
cortex="VISp",
split_flag="train",
frame_feature_path=get_datapath(
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
),
):
def __init__(self,
group,
num_neurons,
seed=111,
cortex="VISp",
split_flag="train",
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"features" / "allen_movies" / "vit_base" / "8" /
"movie_one_image_stack.npz" / "testfeat.pth"):
self.split_flag = split_flag
self.seed = seed
self.group = group
self.num_neurons = num_neurons
data = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
))
pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"allen_movie1_neuropixel" / cortex /
"neuropixel_pseudomouse_120_filtered.jl")
pseudo_mice = data["neural"].T
self.neurons_indices = self._sample_neurons(pseudo_mice)
self.movie_len = pseudo_mice.shape[1]
Expand Down
Loading

0 comments on commit 94fa87a

Please sign in to comment.