Merge pull request #81 from KamitaniLab/feature-inversion-pipeline

Feature inversion pipeline for modular iCNN construction
KamitaniLab · Jul 29, 2024 · 3cd3c8d · 3cd3c8d · github-actions · Jul 29, 2024
2 parents 6075f53 + 02dd872
commit 3cd3c8d
Show file tree

Hide file tree

Showing 33 changed files with 3,475 additions and 9 deletions.
diff --git a/bdpy/dl/torch/dataset.py b/bdpy/dl/torch/dataset.py
@@ -0,0 +1,195 @@
+from __future__ import annotations
+
+from typing import Iterable, Callable, Dict
+
+from pathlib import Path
+
+from PIL import Image
+import numpy as np
+from torch.utils.data import Dataset
+
+from bdpy.dataform import DecodedFeatures, Features
+
+
+_FeatureTypeNP = Dict[str, np.ndarray]
+
+
+def _removesuffix(s: str, suffix: str) -> str:
+    """Remove suffix from string.
+
+    Note
+    ----
+    This function is available from Python 3.9 as `str.removesuffix`. We can
+    remove this function when we drop support for Python 3.8.
+
+    Parameters
+    ----------
+    s : str
+        String.
+    suffix : str
+        Suffix to remove.
+
+    Returns
+    -------
+    str
+        String without suffix.
+    """
+    if suffix and s.endswith(suffix):
+        return s[: -len(suffix)]
+    return s[:]
+
+
+class FeaturesDataset(Dataset):
+    """Dataset of features.
+
+    Parameters
+    ----------
+    root_path : str | Path
+        Path to the root directory of features.
+    layer_path_names : Iterable[str]
+        List of layer path names. Each layer path name is used to get features
+        from the root directory so that the layer path name must be a part of
+        the path to the layer.
+    stimulus_names : list[str], optional
+        List of stimulus names. If None, all stimulus names are used.
+    transform : callable, optional
+        Callable object which is used to transform features. The callable object
+        must take a dict of features and return a dict of features.
+    """
+
+    def __init__(
+        self,
+        root_path: str | Path,
+        layer_path_names: Iterable[str],
+        stimulus_names: list[str] | None = None,
+        transform: Callable[[_FeatureTypeNP], _FeatureTypeNP] | None = None,
+    ):
+        self._features_store = Features(Path(root_path).as_posix())
+        self._layer_path_names = layer_path_names
+        if stimulus_names is None:
+            stimulus_names = self._features_store.labels
+        self._stimulus_names = stimulus_names
+        self._transform = transform
+
+    def __len__(self) -> int:
+        return len(self._stimulus_names)
+
+    def __getitem__(self, index: int) -> _FeatureTypeNP:
+        stimulus_name = self._stimulus_names[index]
+        features = {}
+        for layer_path_name in self._layer_path_names:
+            feature = self._features_store.get(
+                layer=layer_path_name, label=stimulus_name
+            )
+            feature = feature[0]  # NOTE: remove batch axis
+            features[layer_path_name] = feature
+        if self._transform is not None:
+            features = self._transform(features)
+        return features
+
+
+class DecodedFeaturesDataset(Dataset):
+    """Dataset of decoded features.
+
+    Parameters
+    ----------
+    root_path : str | Path
+        Path to the root directory of decoded features.
+    layer_path_names : Iterable[str]
+        List of layer path names. Each layer path name is used to get features
+        from the root directory so that the layer path name must be a part of
+        the path to the layer.
+    subject_id : str
+        ID of the subject.
+    roi : str
+        ROI name.
+    stimulus_names : list[str], optional
+        List of stimulus names. If None, all stimulus names are used.
+    transform : callable, optional
+        Callable object which is used to transform features. The callable object
+        must take a dict of features and return a dict of features.
+    """
+
+    def __init__(
+        self,
+        root_path: str | Path,
+        layer_path_names: Iterable[str],
+        subject_id: str,
+        roi: str,
+        stimulus_names: list[str] | None = None,
+        transform: Callable[[_FeatureTypeNP], _FeatureTypeNP] | None = None,
+    ):
+        self._decoded_features_store = DecodedFeatures(Path(root_path).as_posix())
+        self._layer_path_names = layer_path_names
+        self._subject_id = subject_id
+        self._roi = roi
+        if stimulus_names is None:
+            stimulus_names = self._decoded_features_store.labels
+            assert stimulus_names is not None
+        self._stimulus_names = stimulus_names
+        self._transform = transform
+
+    def __len__(self) -> int:
+        return len(self._stimulus_names)
+
+    def __getitem__(self, index: int) -> _FeatureTypeNP:
+        stimulus_name = self._stimulus_names[index]
+        decoded_features = {}
+        for layer_path_name in self._layer_path_names:
+            decoded_feature = self._decoded_features_store.get(
+                layer=layer_path_name,
+                label=stimulus_name,
+                subject=self._subject_id,
+                roi=self._roi,
+            )
+            decoded_feature = decoded_feature[0]  # NOTE: remove batch axis
+            decoded_features[layer_path_name] = decoded_feature
+        if self._transform is not None:
+            decoded_features = self._transform(decoded_features)
+        return decoded_features
+
+
+class ImageDataset(Dataset):
+    """Dataset of images.
+
+    Parameters
+    ----------
+    root_path : str | Path
+        Path to the root directory of images.
+    stimulus_names : list[str], optional
+        List of stimulus names. If None, all stimulus names are used.
+    extension : str, optional
+        Extension of the image files.
+    """
+
+    def __init__(
+        self,
+        root_path: str | Path,
+        stimulus_names: list[str] | None = None,
+        extension: str = "jpg",
+    ):
+        self.root_path = root_path
+        if stimulus_names is None:
+            stimulus_names = [
+                _removesuffix(path.name, "." + extension)
+                for path in Path(root_path).glob(f"*{extension}")
+            ]
+        self._stimulus_names = stimulus_names
+        self._extension = extension
+
+    def __len__(self):
+        return len(self._stimulus_names)
+
+    def __getitem__(self, index: int):
+        stimulus_name = self._stimulus_names[index]
+        image = Image.open(Path(self.root_path) / f"{stimulus_name}.{self._extension}")
+        image = image.convert("RGB")
+        return np.array(image) / 255.0, stimulus_name
+
+
+class RenameFeatureKeys:
+    def __init__(self, mapping: dict[str, str]):
+        self._mapping = mapping
+
+    def __call__(self, features: _FeatureTypeNP) -> _FeatureTypeNP:
+        return {self._mapping.get(key, key): value for key, value in features.items()}
diff --git a/bdpy/dl/torch/domain/__init__.py b/bdpy/dl/torch/domain/__init__.py
@@ -0,0 +1 @@
+from .core import Domain, InternalDomain, IrreversibleDomain, ComposedDomain, KeyValueDomain
diff --git a/bdpy/dl/torch/domain/core.py b/bdpy/dl/torch/domain/core.py
@@ -0,0 +1,175 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Iterable, TypeVar, Generic
+import warnings
+
+import torch.nn as nn
+
+_T = TypeVar("_T")
+
+
+class Domain(nn.Module, ABC, Generic[_T]):
+    """Base class for stimulus domain.
+
+    This class is used to convert data between each domain and library's internal common space.
+    Suppose that we have two functions `f: X -> Y_1` and `g: Y_2 -> Z` and want to compose them.
+    Here, `X`, `Y_1`, `Y_2`, and `Z` are different domains and assume that `Y_1` and `Y_2` are
+    the similar domain that can be converted to each other.
+    Then, we can compose `f` and `g` as `g . t . f(x)`, where `t: Y_1 -> Y_2` is the domain
+    conversion function. This class is used to implement `t`.
+
+    The subclasses of this class should implement `send` and `receive` methods. The `send` method
+    converts data from the original domain (`Y_1` or `Y_2`) to the internal common space (`Y_0`),
+    and the `receive` method converts data from the internal common space to the original domain.
+    By implementing domain class for `Y_1` and `Y_2`, we can construct the domain conversion function
+    `t` as `t = Y_2.receive . Y_1.send`.
+
+    Note that the subclasses of this class do not necessarily guarantee the reversibility of `send`
+    and `receive` methods. If the domain conversion is irreversible, the subclasses should inherit
+    `IrreversibleDomain` class instead of this class.
+    """
+
+    @abstractmethod
+    def send(self, x: _T) -> _T:
+        """Send stimulus to the internal common space from each domain.
+
+        Parameters
+        ----------
+        x : _T
+            Data in the original domain.
+
+        Returns
+        -------
+        _T
+            Data in the internal common space.
+        """
+        pass
+
+    @abstractmethod
+    def receive(self, x: _T) -> _T:
+        """Receive data from the internal common space to each domain.
+
+        Parameters
+        ----------
+        x : _T
+            Data in the internal common space.
+
+        Returns
+        -------
+        _T
+            Data in the original domain.
+        """
+        pass
+
+
+class InternalDomain(Domain, Generic[_T]):
+    """The internal common space.
+
+    The domain class which defines the internal common space. This class
+    receives and sends data as it is.
+    """
+
+    def send(self, x: _T) -> _T:
+        return x
+
+    def receive(self, x: _T) -> _T:
+        return x
+
+
+class IrreversibleDomain(Domain, Generic[_T]):
+    """The domain which cannot be reversed.
+
+    This class is used to convert data between each domain and library's
+    internal common space. Note that the subclasses of this class do not
+    guarantee the reversibility of `send` and `receive` methods.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        warnings.warn(
+            f"{self.__class__.__name__} is an irreversible domain. " \
+            "It does not guarantee the reversibility of `send` and `receive` " \
+            "methods. Please use the combination of `send` and `receive` methods " \
+            "with caution.",
+            RuntimeWarning,
+        )
+
+    def send(self, x: _T) -> _T:
+        return x
+
+    def receive(self, x: _T) -> _T:
+        return x
+
+
+class ComposedDomain(Domain, Generic[_T]):
+    """The domain composed of multiple sub-domains.
+
+    Suppose we have list of domain objects `domains = [d_0, d_1, ..., d_n]`.
+    Then, `ComposedDomain(domains)` accesses the data in the original domain `D`
+    as `d_n.receive . ... d_1.receive . d_0.receive(x)` from the internal common space `D_0`.
+
+    Parameters
+    ----------
+    domains : Iterable[Domain]
+        Sub-domains to compose.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import torch
+    >>> from bdpy.dl.torch.domain import ComposedDomain
+    >>> from bdpy.dl.torch.domain.image_domain import AffineDomain, BGRDomain
+    >>> composed_domain = ComposedDomain([
+    ...     AffineDomain(0.5, 1),
+    ...     BGRDomain(),
+    ... ])
+    >>> image = torch.randn(1, 3, 64, 64).clamp(-0.5, 0.5)
+    >>> image.shape
+    torch.Size([1, 3, 64, 64])
+    >>> composed_domain.send(image).shape
+    torch.Size([1, 3, 64, 64])
+    >>> print(composed_domain.send(image).min().item(), composed_domain.send(image).max().item())
+    0.0 1.0
+    """
+
+    def __init__(self, domains: Iterable[Domain]) -> None:
+        super().__init__()
+        self.domains = nn.ModuleList(domains)
+
+    def send(self, x: _T) -> _T:
+        for domain in reversed(self.domains):
+            x = domain.send(x)
+        return x
+
+    def receive(self, x: _T) -> _T:
+        for domain in self.domains:
+            x = domain.receive(x)
+        return x
+
+
+class KeyValueDomain(Domain, Generic[_T]):
+    """The domain which converts key-value pairs.
+
+    This class is used to convert key-value pairs between each domain and library's
+    internal common space.
+
+    Parameters
+    ----------
+    domain_mapper : dict[str, Domain]
+        Dictionary that maps keys to domains.
+    """
+
+    def __init__(self, domain_mapper: dict[str, Domain]) -> None:
+        super().__init__()
+        self.domain_mapper = domain_mapper
+
+    def send(self, x: dict[str, _T]) -> dict[str, _T]:
+        return {
+            key: self.domain_mapper[key].send(value) for key, value in x.items()
+        }
+
+    def receive(self, x: dict[str, _T]) -> dict[str, _T]:
+        return {
+            key: self.domain_mapper[key].receive(value) for key, value in x.items()
+        }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		from .core import Domain, InternalDomain, IrreversibleDomain, ComposedDomain, KeyValueDomain
File	Stmts	Miss	Cover	Missing
bdpy/bdata
bdata.py	400	320	20%	73–79, 86, 90, 95, 99, 104, 109, 113, 118, 122, 132–134, 155–172, 190, 206–207, 232–248, 252–262, 276–277, 293, 310, 314, 318–356, 388–477, 508, 539, 547–551, 560, 577–584, 589–598, 604–614, 618, 622–625, 628, 632–653, 656–665, 668–677, 683–691, 696–729, 735–744, 750–757, 761–767, 771–799, 803–824, 828–862, 866–868, 872–874, 878–887
featureselector.py	64	57	11%	43–47, 52–93, 98–124
metadata.py	67	48	28%	21–30, 34, 38, 42, 46, 50, 54, 74–115, 135–144, 149, 154
utils.py	113	103	9%	44–110, 127–173, 201, 217–253, 258, 263
bdpy/dataform
datastore.py	107	85	21%	59–75, 90–93, 97–98, 102–113, 116–119, 122–127, 131–132, 137–158, 190–197, 222–259, 262–265
features.py	298	233	22%	29–32, 41–47, 74–103, 107, 111, 115, 119, 137–163, 168–197, 213–238, 241–269, 272–275, 278–289, 305–319, 323, 327, 331, 335, 339, 343, 347, 351, 355, 359, 364–394, 398–418, 422–462, 465, 470–477, 491–493, 496–499, 502–505, 508–512, 515–516, 536–549
kvs.py	140	140	0%	4–272
pd.py	9	5	44%	25–27, 43–44
sparse.py	67	51	24%	19–29, 35–46, 52–58, 65–74, 78, 81–87, 90–93, 96–98, 101–126
utils.py	12	12	0%	3–18
bdpy/dataset
utils.py	45	45	0%	3–98
bdpy/distcomp
distcomp.py	92	78	15%	20–29, 32–49, 52–70, 73–93, 96–107, 111, 114–117, 121–127
bdpy/dl
caffe.py	60	60	0%	4–129
bdpy/dl/torch
base.py	43	24	44%	31–41, 48, 54, 60, 63, 73–83, 90, 96, 102, 105
dataset.py	74	74	0%	1–195
models.py	333	298	11%	28–84, 114–140, 148–169, 175–238, 249–253, 259–279, 288–292, 297–316, 327–331, 340–405, 427–431, 442–494, 515–517, 528–587, 611–614, 625–684, 708–711, 722–771, 790–793, 804–853, 872–875
torch.py	121	92	24%	43–60, 63, 80–95, 105–111, 116, 123, 126, 131, 138, 141, 188–225, 228, 231–243, 246–281
bdpy/dl/torch/domain
core.py	46	20	57%	47, 63, 74, 77, 89–90, 99, 102, 137–138, 141–143, 146–148, 164–165, 168, 173
feature_domain.py	24	13	46%	14, 18, 27–38, 41, 44
image_domain.py	64	37	42%	26, 31, 36, 41, 84–108, 111, 114, 121, 124, 137, 140–150, 179, 211–216, 223, 229
bdpy/evals
metrics.py	95	87	8%	12–34, 40–73, 82–112, 118–159, 172–179
bdpy/feature
feature.py	30	28	7%	33–74
bdpy/fig
__init__.py	4	4	0%	6–9
draw_group_image_set.py	88	88	0%	3–182
fig.py	88	88	0%	16–164
makeplots.py	336	336	0%	1–729
tile_images.py	59	59	0%	1–193
bdpy/ml
crossvalidation.py	59	54	8%	34–61, 104–128, 138, 164–196
ensemble.py	13	9	31%	33–46
learning.py	309	265	14%	43–44, 48, 52, 59, 91–104, 109–125, 128, 158–170, 184–209, 260–284, 290–433, 436–461, 465–504, 507–508, 524–536, 541–613
model.py	140	120	14%	29–39, 54–70, 86–144, 156–169, 184–222, 225, 230–250, 254–258, 271–285
regress.py	11	8	27%	29–38
searchlight.py	16	13	19%	32–51
bdpy/mri
fmriprep.py	497	452	9%	25–34, 38, 44–62, 65–75, 78–89, 92–160, 163–194, 230–360, 367–380, 384, 388–390, 394, 398–400, 410–434, 437–454, 457–464, 471–472, 475–491, 494, 498, 502–815, 819–831, 842–862, 866
glm.py	40	36	10%	46–95
image.py	24	19	21%	29–54
load_epi.py	28	18	36%	36–50, 56–63, 82–88
load_mri.py	19	16	16%	16–36
roi.py	248	234	6%	37–100, 122–148, 165–235, 241–314, 320–387, 399–466, 473–499
spm.py	158	139	12%	26–155, 162–166, 170, 174–179, 183–300
bdpy/opendata
__init__.py	1	1	0%	1
openneuro.py	210	210	0%	1–329
bdpy/pipeline
config.py	36	29	19%	15–64
bdpy/preproc
interface.py	52	44	15%	31–40, 60–69, 96–105, 111–123, 148–157, 208–217
preprocessor.py	129	107	17%	35, 43–65, 74–78, 85–97, 104–132, 138–189, 196–227, 234–239
select_top.py	23	18	22%	35–60
util.py	6	2	67%	14, 22
bdpy/recon
utils.py	55	55	0%	4–146
bdpy/recon/torch
icnn.py	161	161	0%	15–478
bdpy/recon/torch/modules
critic.py	44	20	55%	19, 36, 58, 64–65, 68, 71, 96–110, 132, 157, 184–185
encoder.py	29	11	62%	29, 44, 63, 66, 104–109, 124–125, 175
generator.py	72	36	50%	15–22, 28–38, 47, 52, 68, 83, 90, 93, 96, 122–124, 128, 143, 186–189, 193, 208, 247–248, 252, 256, 306–309
latent.py	34	13	62%	16, 21, 32, 42, 49, 52, 55, 80–83, 87, 97
bdpy/recon/torch/task
inversion.py	83	49	41%	19, 32, 37, 42, 47, 54, 59, 64, 69, 86–89, 92–95, 98, 101–102, 159–167, 185–214, 218–220
bdpy/stats
corr.py	43	38	12%	29–77, 96–112
bdpy/task
callback.py	71	44	38%	15–20, 53, 102–116, 154–156, 161, 166, 212–218, 233–247, 264–265
core.py	16	4	75%	41, 45, 50, 60
bdpy/util
info.py	47	36	23%	19–79
math.py	13	10	23%	23–38
utils.py	36	26	28%	48–66, 93–96, 116–121, 137–145
TOTAL	5644	4782	15%