Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OS independent paths for Allen dataset #88

Merged
39 changes: 21 additions & 18 deletions cebra/datasets/allen/ca_movie.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand Down Expand Up @@ -66,9 +66,9 @@ def __init__(
num_neurons=10,
seed=111,
area="VISp",
frame_feature_path=get_datapath(
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
),
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"features" / "allen_movies" / "vit_base" / "8" /
"movie_one_image_stack.npz" / "testfeat.pth",
pca=False,
load=None,
):
Expand Down Expand Up @@ -116,16 +116,17 @@ def _get_pseudo_mice(self, area: str):
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.

"""

self.area = area
list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
exp_containers = [
int(mice.split(f"{area}/")[1].replace(".mat", ""))
for mice in list_mice
]
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*.mat")
exp_containers = [int(file.stem) for file in list_mice]

## Load summary file
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
summary = pd.read_csv(
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
## Filter excitatory neurons in V1
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
(summary["target"] == area) &
Expand Down Expand Up @@ -169,9 +170,10 @@ def _convert_to_nums(string):
indices2.sort()
indices3.sort()
indices = [indices1, indices2, indices3]
matfile = get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
)
matfile = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
area) / f"{exp_container}.mat"
traces = scipy.io.loadmat(matfile)
for n, i in enumerate(seq_sessions):
session = traces["filtered_traces_days_events"][n, 0][
Expand Down Expand Up @@ -214,9 +216,10 @@ class AllenCaMoviePreLoadDataset(AllenCaMovieDataset):
"""

def __init__(self, num_neurons, seed):
preload = get_datapath(
f"allen_preload/allen-movie1-ca-{num_neurons}-{seed}.jl")
if not os.path.isfile(preload):
preload = pathlib.Path(
_DEFAULT_DATADIR
) / "allen_preload" / f"allen-movie1-ca-{num_neurons}-{seed}.jl"
if not preload.is_file():
print("The dataset is not yet preloaded.")
preload = None
super().__init__(num_neurons=num_neurons, seed=seed, load=preload)
39 changes: 21 additions & 18 deletions cebra/datasets/allen/ca_movie_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -41,6 +42,8 @@
from cebra.datasets.allen import SEEDS
from cebra.datasets.allen import SEEDS_DISJOINT

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie-{num_movie}-ca-{cortex}-{num_neurons}-{split_flag}-{test_repeat}-{seed}",
Expand Down Expand Up @@ -107,9 +110,9 @@ def _get_video_features(self, num_movie="one"):

"""

frame_feature_path = get_datapath(
f"allen/features/allen_movies/vit_base/8/movie_{num_movie}_image_stack.npz/testfeat.pth"
)
frame_feature_path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "features" / "allen_movies" / "vit_base" / "8" / f"movie_{num_movie}_image_stack.npz" / "testfeat.pth"
frame_feature = torch.load(frame_feature_path)
return frame_feature

Expand Down Expand Up @@ -171,15 +174,14 @@ def _get_pseudo_mice(self, area, num_movie):

"""

list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
exp_containers = [
int(mice.split(f"{area}/")[1].replace(".mat", ""))
for mice in list_mice
]
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*.mat")
exp_containers = [int(file.stem) for file in list_mice]
## Load summary file
summary = pd.read_csv(get_datapath("allen/data_summary.csv"))
summary = pd.read_csv(
pathlib.Path(_DEFAULT_DATADIR) / "allen" / "data_summary.csv")
## Filter excitatory neurons in V1
area_filtered = summary[(summary["exp"].isin(exp_containers)) &
(summary["target"] == area) &
Expand Down Expand Up @@ -223,9 +225,10 @@ def _convert_to_nums(string):
indices2.sort()
indices3.sort()
indices = [indices1, indices2, indices3]
matfile = get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/{exp_container}.mat"
)
matfile = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(
area) / f"{exp_container}.mat"
traces = scipy.io.loadmat(matfile)
for n, i in enumerate(seq_sessions):
session = traces["filtered_traces_days_events"][n, 0][
Expand Down Expand Up @@ -325,10 +328,10 @@ def _get_pseudo_mice(self, area, num_movie):
area: The visual cortical area to sample the neurons. Possible options: VISp, VISpm, VISam, VISal, VISl, VISrl.

"""

list_mice = glob.glob(
get_datapath(
f"allen/visual_drift/data/calcium_excitatory/{area}/*"))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "visual_drift" / "data" / "calcium_excitatory" / str(area)
list_mice = path.glob("*")

def _get_neural_data(num_movie, mat_file):
mat = scipy.io.loadmat(mat_file)
Expand Down
1 change: 0 additions & 1 deletion cebra/datasets/allen/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import glob
import hashlib
import os

import h5py
import joblib
Expand Down
24 changes: 12 additions & 12 deletions cebra/datasets/allen/make_neuropixel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import argparse
import glob
import os
import pathlib

import h5py
import joblib as jl
Expand All @@ -32,6 +32,8 @@

from cebra.datasets import get_datapath

_DEFAULT_DATADIR = get_datapath()


def _filter_units(
unit_ids: npt.NDArray[np.int64],
Expand Down Expand Up @@ -153,13 +155,13 @@ def _spike_counts(bin_edges: npt.NDArray[np.float64], units: list):


def read_neuropixel(
path: str = "/shared/neuropixel/*/*.nwb",
path: str = pathlib.Path("/shared/neuropixel/"),
cortex: str = "VISp",
sampling_rate: float = 120.0,
):
"""Load 120Hz Neuropixels data recorded in the specified cortex during the movie1 stimulus.

The Neuropixels recordin is filtered and transformed to spike counts in a bin size specified by the sampling rat.
The Neuropixels recording is filtered and transformed to spike counts in a bin size specified by the sampling rat.

Args:
path: The wildcard file path where the neuropixels .nwb files are located.
Expand All @@ -168,7 +170,7 @@ def read_neuropixel(

"""

files = glob.glob(path)
files = path.glob("*/*.nwb")
sessions = {}
len_recording = []
session_frames = []
Expand Down Expand Up @@ -238,7 +240,8 @@ def read_neuropixel(
parser = argparse.ArgumentParser()
parser.add_argument("--data-path", default="/shared/neuropixel", type=str)
parser.add_argument("--save-path",
default=get_datapath("allen_movie1_neuropixel/VISp/"),
default=pathlib.Path(_DEFAULT_DATADIR) /
"allen_movie1_neuropixel" / "VISp",
type=str)
parser.add_argument("--sampling-rate", default=120, type=float)
parser.add_argument("--cortex", default="VISp", type=str)
Expand All @@ -255,17 +258,14 @@ def read_neuropixel(
"neural": sessions_dic,
"frames": session_frames
},
os.path.join(
args.save_path,
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl"),
Path(args.save_path) /
f"neuropixel_sessions_{int(args.sampling_rate)}_filtered.jl",
)
jl.dump(
{
"neural": pseudo_mice,
"frames": pseudo_mice_frames
},
os.path.join(
args.save_path,
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
),
Path(args.save_path) /
f"neuropixel_pseudomouse_{int(args.sampling_rate)}_filtered.jl",
)
20 changes: 11 additions & 9 deletions cebra/datasets/allen/neuropixel_movie.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -38,6 +39,8 @@
from cebra.datasets.allen import NUM_NEURONS
from cebra.datasets.allen import SEEDS

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie1-neuropixel-{num_neurons}-{seed}-10ms",
Expand Down Expand Up @@ -70,10 +73,10 @@ def _get_pseudo_mice(self, area="VISp"):

"""
self.area = area
list_recording = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{area}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / area / "neuropixel_pseudomouse_120_filtered.jl"
list_recording = joblib.load(path)
pseudo_mice = list_recording["neural"]

return pseudo_mice.transpose(1, 0)
Expand All @@ -87,10 +90,9 @@ def _get_index(self, frame_feature):
frame feature: The video frame feature.

"""

list_recording = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{self.area}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / self.area / "neuropixel_pseudomouse_120_filtered.jl"
list_recording = joblib.load(path)
frames_index = list_recording["frames"]
return frame_feature[frames_index]
38 changes: 19 additions & 19 deletions cebra/datasets/allen/neuropixel_movie_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import glob
import hashlib
import os
import pathlib

import h5py
import joblib
Expand All @@ -40,6 +41,8 @@
from cebra.datasets.allen import SEEDS
from cebra.datasets.allen import SEEDS_DISJOINT

_DEFAULT_DATADIR = get_datapath()


@parametrize(
"allen-movie-{num_movie}-neuropixel-{cortex}-{num_neurons}-{split_flag}-10-{seed}",
Expand Down Expand Up @@ -87,11 +90,10 @@ def _get_pseudo_mice(self, cortex: str, num_movie: str = "one"):
Args:
cortex: The visual cortical area.
"""

data = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
))
path = pathlib.Path(
_DEFAULT_DATADIR
) / "allen" / "allen_movie1_neuropixel" / cortex / "neuropixel_pseudomouse_120_filtered.jl"
data = joblib.load(path)
return data

def _split(self, pseudo_mice, frame_feature):
Expand Down Expand Up @@ -148,25 +150,23 @@ class AllenNeuropixelMovie120HzCorticesDisjointDataset(

"""

def __init__(
self,
group,
num_neurons,
seed=111,
cortex="VISp",
split_flag="train",
frame_feature_path=get_datapath(
"allen/features/allen_movies/vit_base/8/movie_one_image_stack.npz/testfeat.pth"
),
):
def __init__(self,
group,
num_neurons,
seed=111,
cortex="VISp",
split_flag="train",
frame_feature_path=pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"features" / "allen_movies" / "vit_base" / "8" /
"movie_one_image_stack.npz" / "testfeat.pth"):
self.split_flag = split_flag
self.seed = seed
self.group = group
self.num_neurons = num_neurons
data = joblib.load(
get_datapath(
f"allen/allen_movie1_neuropixel/{cortex}/neuropixel_pseudomouse_120_filtered.jl"
))
pathlib.Path(_DEFAULT_DATADIR) / "allen" /
"allen_movie1_neuropixel" / cortex /
"neuropixel_pseudomouse_120_filtered.jl")
pseudo_mice = data["neural"].T
self.neurons_indices = self._sample_neurons(pseudo_mice)
self.movie_len = pseudo_mice.shape[1]
Expand Down
Loading
Loading