Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH]: Move data downloading/handling to junifer-data package. #363

Open
wants to merge 31 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
761e271
feat: add data.utils.check_dataset() to get or install junifer-data d…
synchon Dec 11, 2024
89cbf96
update: use data.utils.check_dataset() in get_xfm()
synchon Dec 11, 2024
bc18eac
chore: update junifer-data clone path
synchon Dec 12, 2024
d68cfa2
feat: add data.utils.fetch_file_via_datalad() to get files via datalad
synchon Dec 12, 2024
9603715
update: use data.utils.fetch_file_via_datalad() in get_xfm()
synchon Dec 12, 2024
ba83a5f
update: use junifer-data in CoordinatesRegistry.load()
synchon Dec 12, 2024
ef48f00
update: use junifer-data in MaskRegistry.load()
synchon Dec 16, 2024
06e4db5
fix: update init and registration logic for CoordinatesRegistry
synchon Dec 16, 2024
09357f6
chore: update CoordinatesRegistry tests
synchon Dec 16, 2024
04aa4b3
fix: update init and registration logic for MaskRegistry
synchon Dec 16, 2024
94e6498
chore: update MaskRegistry tests
synchon Dec 16, 2024
be69790
chore: lint
synchon Dec 16, 2024
a81a794
chore: update commentary for MaskRegistry
synchon Dec 16, 2024
d596a2d
update: store Path instead of str for external entries in MaskRegistry
synchon Dec 16, 2024
9d339b3
update: use junifer-data in ParcellationRegistry.load(); fix: update …
synchon Dec 18, 2024
7a43e96
chore: update ParcellationRegistry tests
synchon Dec 18, 2024
f730984
chore: remove unnecessary Path conversion in MaskRegistry
synchon Dec 18, 2024
73f0f40
chore: improve log messages in template_spaces.py
synchon Dec 18, 2024
a666567
chore: remove unnecessary type check stop
synchon Dec 18, 2024
4cb0f49
chore: remove httpx from deps
synchon Dec 18, 2024
733110e
chore: add changelogs 418.{enh,misc}
synchon Jan 8, 2025
6950fcc
update: adjust log level in data.utils.fetch_file_via_datalad
synchon Jan 10, 2025
8bd6f22
chore: add junifer_data to deps
synchon Jan 21, 2025
43d5f3b
chore: update ConfigVal typehint
synchon Jan 21, 2025
bb2fe07
refactor: adapt junifer_data usage for helper and version
synchon Jan 21, 2025
6a85c99
refactor: use junifer_data to load coordinates
synchon Jan 21, 2025
abda09f
refactor: use junifer_data to load masks
synchon Jan 21, 2025
1a2a7af
refactor: use junifer_data to load parcellations
synchon Jan 21, 2025
26b63c2
chore: update tests for masks
synchon Jan 21, 2025
34c505e
chore: update tests for parcellations
synchon Jan 21, 2025
81ced3b
refactor: use junifer_data to load xfms
synchon Jan 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes/newsfragments/418.enh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Adapt usage of ``junifer-data`` DataLad dataset to fetch parcellations, masks, coordinates and xfms by `Synchon Mandal`_
1 change: 1 addition & 0 deletions docs/changes/newsfragments/418.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove ``httpx`` as a dependency by `Synchon Mandal`_
2 changes: 0 additions & 2 deletions junifer/cli/tests/test_cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def test_get_dependency_information_short() -> None:
"nilearn",
"sqlalchemy",
"ruamel.yaml",
"httpx",
"tqdm",
"templateflow",
"lapy",
Expand Down Expand Up @@ -73,7 +72,6 @@ def test_get_dependency_information_long() -> None:
"nilearn",
"sqlalchemy",
"ruamel.yaml",
"httpx",
"tqdm",
"templateflow",
"lapy",
Expand Down
227 changes: 123 additions & 104 deletions junifer/data/coordinates/_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@

import numpy as np
import pandas as pd
from junifer_data import get
from numpy.typing import ArrayLike

from ...utils import logger, raise_error
from ...utils.singleton import Singleton
from ..pipeline_data_registry_base import BasePipelineDataRegistry
from ..utils import get_native_warper
from ..utils import JUNIFER_DATA_VERSION, get_dataset_path, get_native_warper
from ._ants_coordinates_warper import ANTsCoordinatesWarper
from ._fsl_coordinates_warper import FSLCoordinatesWarper

Expand All @@ -32,104 +33,104 @@ class CoordinatesRegistry(BasePipelineDataRegistry, metaclass=Singleton):

def __init__(self) -> None:
"""Initialize the class."""
super().__init__()
# Each entry in registry is a dictionary that must contain at least
# the following keys:
# * 'space': the coordinates' space (e.g., 'MNI')
# The built-in coordinates are files that are shipped with the package
# in the data/VOIs directory. The user can also register their own
# The built-in coordinates are files that are shipped with the
# junifer-data dataset. The user can also register their own
# coordinates, which will be stored as numpy arrays in the dictionary.
# Make built-in and external dictionaries for validation later
self._builtin = {}
self._external = {}

# Path to the metadata of the VOIs
_vois_meta_path = Path(__file__).parent / "VOIs" / "meta"

self._builtin = {
"CogAC": {
"path": _vois_meta_path / "CogAC_VOIs.txt",
"space": "MNI",
},
"CogAR": {
"path": _vois_meta_path / "CogAR_VOIs.txt",
"space": "MNI",
},
"DMNBuckner": {
"path": _vois_meta_path / "DMNBuckner_VOIs.txt",
"space": "MNI",
},
"eMDN": {
"path": _vois_meta_path / "eMDN_VOIs.txt",
"space": "MNI",
},
"Empathy": {
"path": _vois_meta_path / "Empathy_VOIs.txt",
"space": "MNI",
},
"eSAD": {
"path": _vois_meta_path / "eSAD_VOIs.txt",
"space": "MNI",
},
"extDMN": {
"path": _vois_meta_path / "extDMN_VOIs.txt",
"space": "MNI",
},
"Motor": {
"path": _vois_meta_path / "Motor_VOIs.txt",
"space": "MNI",
},
"MultiTask": {
"path": _vois_meta_path / "MultiTask_VOIs.txt",
"space": "MNI",
},
"PhysioStress": {
"path": _vois_meta_path / "PhysioStress_VOIs.txt",
"space": "MNI",
},
"Rew": {
"path": _vois_meta_path / "Rew_VOIs.txt",
"space": "MNI",
},
"Somatosensory": {
"path": _vois_meta_path / "Somatosensory_VOIs.txt",
"space": "MNI",
},
"ToM": {
"path": _vois_meta_path / "ToM_VOIs.txt",
"space": "MNI",
},
"VigAtt": {
"path": _vois_meta_path / "VigAtt_VOIs.txt",
"space": "MNI",
},
"WM": {
"path": _vois_meta_path / "WM_VOIs.txt",
"space": "MNI",
},
"Power": {
"path": _vois_meta_path / "Power2011_MNI_VOIs.txt",
"space": "MNI",
},
"Power2011": {
"path": _vois_meta_path / "Power2011_MNI_VOIs.txt",
"space": "MNI",
},
"Dosenbach": {
"path": _vois_meta_path / "Dosenbach2010_MNI_VOIs.txt",
"space": "MNI",
},
"Power2013": {
"path": _vois_meta_path / "Power2013_MNI_VOIs.tsv",
"space": "MNI",
},
"AutobiographicalMemory": {
"path": _vois_meta_path / "AutobiographicalMemory_VOIs.txt",
"space": "MNI",
},
}

# Set built-in to registry
self._registry = self._builtin
self._builtin.update(
{
"CogAC": {
"file_path_suffix": "CogAC_VOIs.txt",
"space": "MNI",
},
"CogAR": {
"file_path_suffix": "CogAR_VOIs.txt",
"space": "MNI",
},
"DMNBuckner": {
"file_path_suffix": "DMNBuckner_VOIs.txt",
"space": "MNI",
},
"eMDN": {
"file_path_suffix": "eMDN_VOIs.txt",
"space": "MNI",
},
"Empathy": {
"file_path_suffix": "Empathy_VOIs.txt",
"space": "MNI",
},
"eSAD": {
"file_path_suffix": "eSAD_VOIs.txt",
"space": "MNI",
},
"extDMN": {
"file_path_suffix": "extDMN_VOIs.txt",
"space": "MNI",
},
"Motor": {
"file_path_suffix": "Motor_VOIs.txt",
"space": "MNI",
},
"MultiTask": {
"file_path_suffix": "MultiTask_VOIs.txt",
"space": "MNI",
},
"PhysioStress": {
"file_path_suffix": "PhysioStress_VOIs.txt",
"space": "MNI",
},
"Rew": {
"file_path_suffix": "Rew_VOIs.txt",
"space": "MNI",
},
"Somatosensory": {
"file_path_suffix": "Somatosensory_VOIs.txt",
"space": "MNI",
},
"ToM": {
"file_path_suffix": "ToM_VOIs.txt",
"space": "MNI",
},
"VigAtt": {
"file_path_suffix": "VigAtt_VOIs.txt",
"space": "MNI",
},
"WM": {
"file_path_suffix": "WM_VOIs.txt",
"space": "MNI",
},
"Power": {
"file_path_suffix": "Power2011_MNI_VOIs.txt",
"space": "MNI",
},
"Power2011": {
"file_path_suffix": "Power2011_MNI_VOIs.txt",
"space": "MNI",
},
"Dosenbach": {
"file_path_suffix": "Dosenbach2010_MNI_VOIs.txt",
"space": "MNI",
},
"Power2013": {
"file_path_suffix": "Power2013_MNI_VOIs.tsv",
"space": "MNI",
},
"AutobiographicalMemory": {
"file_path_suffix": "AutobiographicalMemory_VOIs.txt",
"space": "MNI",
},
}
)

# Update registry with built-in ones
self._registry.update(self._builtin)

def register(
self,
Expand Down Expand Up @@ -161,9 +162,9 @@ def register(
Raises
------
ValueError
If the coordinates ``name`` is already registered and
If the coordinates ``name`` is a built-in coordinates or
if the coordinates ``name`` is already registered and
``overwrite=False`` or
if the coordinates ``name`` is a built-in coordinates or
if the ``coordinates`` is not a 2D array or
if coordinate value does not have 3 components or
if the ``voi_names`` shape does not match the
Expand All @@ -174,19 +175,20 @@ def register(
"""
# Check for attempt of overwriting built-in coordinates
if name in self._builtin:
if isinstance(self._registry[name].get("path"), Path):
raise_error(
f"Coordinates: {name} already registered as built-in "
"coordinates."
)
raise_error(
f"Coordinates: {name} already registered as built-in "
"coordinates."
)
# Check for attempt of overwriting external coordinates
if name in self._external:
if overwrite:
logger.info(f"Overwriting coordinates: {name}")
else:
raise_error(
f"Coordinates: {name} already registered. "
"Set `overwrite=True` to update its value."
)

# Further checks
if not isinstance(coordinates, np.ndarray):
raise_error(
"Coordinates must be a `numpy.ndarray`, "
Expand All @@ -207,6 +209,7 @@ def register(
f"Length of `voi_names` ({len(voi_names)}) does not match the "
f"number of `coordinates` ({coordinates.shape[0]})."
)
# Registration
logger.info(f"Registering coordinates: {name}")
# Add coordinates info
self._external[name] = {
Expand Down Expand Up @@ -257,6 +260,8 @@ def load(self, name: str) -> tuple[ArrayLike, list[str], str]:
------
ValueError
If ``name`` is invalid.
RuntimeError
If there is a problem fetching the coordinates file.

"""
# Check for valid coordinates name
Expand All @@ -265,17 +270,31 @@ def load(self, name: str) -> tuple[ArrayLike, list[str], str]:
f"Coordinates: {name} not found. "
f"Valid options are: {self.list}"
)
# Load coordinates
# Load coordinates info
t_coord = self._registry[name]
# Load data
if isinstance(t_coord.get("path"), Path):
logger.debug(f"Loading coordinates {t_coord['path'].absolute()!s}")

# Load data for in-built ones
if t_coord.get("file_path_suffix") is not None:
# Set file path to retrieve
coords_file_path = Path(
f"coordinates/{name}/{t_coord['file_path_suffix']}"
)
logger.debug(f"Loading coordinates: `{name}`")
# Load via pandas
df_coords = pd.read_csv(t_coord["path"], sep="\t", header=None)
df_coords = pd.read_csv(
get(
file_path=coords_file_path,
dataset_path=get_dataset_path(),
tag=JUNIFER_DATA_VERSION,
),
sep="\t",
header=None,
)
# Convert dataframe to numpy ndarray
coords = df_coords.iloc[:, [0, 1, 2]].to_numpy()
# Get label names
names = list(df_coords.iloc[:, [3]].values[:, 0])
# Load data for external ones
else:
coords = t_coord["coords"]
names = t_coord["voi_names"]
Expand Down
3 changes: 1 addition & 2 deletions junifer/data/coordinates/tests/test_coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def test_register_built_in_check() -> None:
coordinates=np.zeros(2),
voi_names=["1", "2"],
space="MNI",
overwrite=True,
)


Expand All @@ -32,14 +31,14 @@ def test_register_overwrite() -> None:
coordinates=np.zeros((2, 3)),
voi_names=["roi1", "roi2"],
space="MNI",
overwrite=True,
)
with pytest.raises(ValueError, match=r"already registered"):
CoordinatesRegistry().register(
name="MyList",
coordinates=np.ones((2, 3)),
voi_names=["roi2", "roi3"],
space="MNI",
overwrite=False,
)

CoordinatesRegistry().register(
Expand Down
Loading
Loading