monai/data/__init__.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -58,6 +58,7 @@ @@
         pickle_hashing,
         rectify_header_sform_qform,
         rep_scalar_to_batch,
+        resample_datalist,
         select_cross_validation_folds,
         set_rnd,
         sorted_dict,
@@ Expand Down @@

monai/data/utils.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -66,6 +66,7 @@ @@
         "is_supported_format",
         "partition_dataset",
         "partition_dataset_classes",
+        "resample_datalist",
         "select_cross_validation_folds",
         "json_hashing",
         "pickle_hashing",
@@ Expand Down Expand Up / @@ -991,6 +992,31 @@ def partition_dataset_classes( @@
         return datasets
+    def resample_datalist(data: Sequence, factor: float, random_pick: bool = False, seed: int = 0):
+        """
+        Utility function to resample the loaded datalist for training, for example:
+        If factor < 1.0, randomly pick part of the datalist and set to Dataset, useful to quickly test the program.
+        If factor > 1.0, repeat the datalist to enhance the Dataset.
+        Args:
+            data: original datalist to scale.
+            factor: scale factor for the datalist, for example, factor=4.5, repeat the datalist 4 times and plus
+% of the original datalist.
+            random_pick: whether to randomly pick data if scale factor has decimal part.
+            seed: random seed to randomly pick data.
+        """
+        scale, repeats = math.modf(factor)
+        ret: List = list()
+        for _ in range(int(repeats)):
+            ret.extend(list(deepcopy(data)))
+        if scale > 1e-6:
+            ret.extend(partition_dataset(data=data, ratios=[scale, 1 - scale], shuffle=random_pick, seed=seed)[0])
+        return ret
     def select_cross_validation_folds(partitions: Sequence[Iterable], folds: Union[Sequence[int], int]) -> List:
         """
         Select cross validation data based on data partitions and specified fold index.
@@ Expand Down @@

tests/test_inverse.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -450,7 +450,7 @@ def test_inverse_inferred_seg(self, extra_transform): @@
             batch_size = 10
             # num workers = 0 for mac
-            num_workers = 2 if sys.platform != "darwin" else 0
+            num_workers = 2 if sys.platform == "linux" else 0
             transforms = Compose([AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform])
             num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform))
@@ Expand Down @@

tests/test_resample_datalist.py

-Original file line number
+Diff line change
@@ -0,0 +1,40 @@
+    # Copyright 2020 - 2021 MONAI Consortium
+    # Licensed under the Apache License, Version 2.0 (the "License");
+    # you may not use this file except in compliance with the License.
+    # You may obtain a copy of the License at
+    #     http://www.apache.org/licenses/LICENSE-2.0
+    # Unless required by applicable law or agreed to in writing, software
+    # distributed under the License is distributed on an "AS IS" BASIS,
+    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    # See the License for the specific language governing permissions and
+    # limitations under the License.
+    import unittest
+    import numpy as np
+    from parameterized import parameterized
+    from monai.data import resample_datalist
+    TEST_CASE_1 = [
+        {"data": [1, 2, 3, 4, 5], "factor": 2.5, "random_pick": True, "seed": 123},
+        [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 2, 4, 5],
+    ]
+    TEST_CASE_2 = [
+        {"data": [1, 2, 3, 4, 5], "factor": 2.5, "random_pick": False, "seed": 0},
+        [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3],
+    ]
+    TEST_CASE_3 = [{"data": [1, 2, 3, 4, 5], "factor": 0.6, "random_pick": True, "seed": 123}, [2, 4, 5]]
+    class TestResampleDatalist(unittest.TestCase):
+        @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+        def test_value_shape(self, input_param, expected):
+            result = resample_datalist(**input_param)
+            np.testing.assert_allclose(result, expected)
+    if __name__ == "__main__":
+        unittest.main()

3174 Add support to scale datalist #3175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

wyli merged 9 commits into Project-MONAI:dev from Nic-Ma:3174-scale-datalist

Oct 25, 2021

-Original file line number
+Diff line change
@@ Expand Up / @@ -58,6 +58,7 @@ @@
         pickle_hashing,
         rectify_header_sform_qform,
         rep_scalar_to_batch,
+        resample_datalist,
         select_cross_validation_folds,
         set_rnd,
         sorted_dict,
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -66,6 +66,7 @@ @@
         "is_supported_format",
         "partition_dataset",
         "partition_dataset_classes",
+        "resample_datalist",
         "select_cross_validation_folds",
         "json_hashing",
         "pickle_hashing",
@@ Expand Down Expand Up / @@ -991,6 +992,31 @@ def partition_dataset_classes( @@
         return datasets
+    def resample_datalist(data: Sequence, factor: float, random_pick: bool = False, seed: int = 0):
+        """
+        Utility function to resample the loaded datalist for training, for example:
+        If factor < 1.0, randomly pick part of the datalist and set to Dataset, useful to quickly test the program.
+        If factor > 1.0, repeat the datalist to enhance the Dataset.
+        Args:
+            data: original datalist to scale.
+            factor: scale factor for the datalist, for example, factor=4.5, repeat the datalist 4 times and plus
+% of the original datalist.
+            random_pick: whether to randomly pick data if scale factor has decimal part.
+            seed: random seed to randomly pick data.
+        """
+        scale, repeats = math.modf(factor)
+        ret: List = list()
+        for _ in range(int(repeats)):
+            ret.extend(list(deepcopy(data)))
+        if scale > 1e-6:
+            ret.extend(partition_dataset(data=data, ratios=[scale, 1 - scale], shuffle=random_pick, seed=seed)[0])
+        return ret
     def select_cross_validation_folds(partitions: Sequence[Iterable], folds: Union[Sequence[int], int]) -> List:
         """
         Select cross validation data based on data partitions and specified fold index.
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -450,7 +450,7 @@ def test_inverse_inferred_seg(self, extra_transform): @@
             batch_size = 10
             # num workers = 0 for mac
-            num_workers = 2 if sys.platform != "darwin" else 0
+            num_workers = 2 if sys.platform == "linux" else 0
             transforms = Compose([AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform])
             num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform))
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -0,0 +1,40 @@
+    # Copyright 2020 - 2021 MONAI Consortium
+    # Licensed under the Apache License, Version 2.0 (the "License");
+    # you may not use this file except in compliance with the License.
+    # You may obtain a copy of the License at
+    #     http://www.apache.org/licenses/LICENSE-2.0
+    # Unless required by applicable law or agreed to in writing, software
+    # distributed under the License is distributed on an "AS IS" BASIS,
+    # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    # See the License for the specific language governing permissions and
+    # limitations under the License.
+    import unittest
+    import numpy as np
+    from parameterized import parameterized
+    from monai.data import resample_datalist
+    TEST_CASE_1 = [
+        {"data": [1, 2, 3, 4, 5], "factor": 2.5, "random_pick": True, "seed": 123},
+        [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 2, 4, 5],
+    ]
+    TEST_CASE_2 = [
+        {"data": [1, 2, 3, 4, 5], "factor": 2.5, "random_pick": False, "seed": 0},
+        [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3],
+    ]
+    TEST_CASE_3 = [{"data": [1, 2, 3, 4, 5], "factor": 0.6, "random_pick": True, "seed": 123}, [2, 4, 5]]
+    class TestResampleDatalist(unittest.TestCase):
+        @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+        def test_value_shape(self, input_param, expected):
+            result = resample_datalist(**input_param)
+            np.testing.assert_allclose(result, expected)
+    if __name__ == "__main__":
+        unittest.main()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

3174 Add support to scale datalist #3175

Uh oh!

Diff view

Diff view

There are no files selected for viewing