diff --git a/.all-contributorsrc b/.all-contributorsrc index 10de85ad..ae5374d3 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -31,7 +31,7 @@ "research", "ideas", "talk", - "doc", + "doc" ] }, { @@ -107,6 +107,16 @@ "mentoring", "review" ] + }, + { + "login": "mradamcox", + "name": "Adam Cox", + "avatar_url": "https://avatars.githubusercontent.com/u/10427268?v=4", + "profile": "http://mradamcox.github.io", + "contributions": [ + "code", + "test" + ] } ], "contributorsPerLine": 7, @@ -114,5 +124,5 @@ "repoType": "github", "repoHost": "https://github.com", "projectName": "MapReader", - "projectOwner": "Living-with-machines" + "projectOwner": "maps-as-data" } diff --git a/.github/workflows/mr_ci.yml b/.github/workflows/mr_ci.yml index fef6e30b..0f4462bd 100644 --- a/.github/workflows/mr_ci.yml +++ b/.github/workflows/mr_ci.yml @@ -1,7 +1,10 @@ --- name: Units Tests -on: [push] +on: + pull_request: + branches: + - main jobs: diff --git a/.github/workflows/mr_ci_text_spotting.yml b/.github/workflows/mr_ci_text_spotting.yml index d321d7bc..43bc3f1d 100644 --- a/.github/workflows/mr_ci_text_spotting.yml +++ b/.github/workflows/mr_ci_text_spotting.yml @@ -1,7 +1,10 @@ --- name: Units Tests - Text Spotting -on: [push] +on: + pull_request: + branches: + - main # Run linter with github actions for quick feedbacks. jobs: diff --git a/.github/workflows/publish-to-conda-forge.yml b/.github/workflows/publish-to-conda-forge.yml index e5950501..9b177cc3 100644 --- a/.github/workflows/publish-to-conda-forge.yml +++ b/.github/workflows/publish-to-conda-forge.yml @@ -4,6 +4,8 @@ name: Publish to Conda Forge on: workflow_dispatch: push: + branches: + - main tags: - v* diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 502489b9..0ac400e6 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -4,6 +4,8 @@ name: Publish Tagged Python 🐍 distributions 📦 to PyPI on: workflow_dispatch: push: + branches: + - main tags: - v* diff --git a/CHANGELOG.md b/CHANGELOG.md index cabd1d60..6c8d381d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ _Add new changes here_ - Added ability to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536) - Added minimal dataclasses for text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536) +- Added `skip_blank_patches` argument to `MapImages.patchify_all()` ([#540](https://github.com/maps-as-data/MapReader/pull/540)) ## [v1.6.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.6.1) (2024-11-18) diff --git a/README.md b/README.md index b9bf1344..74b8d7b5 100644 --- a/README.md +++ b/README.md @@ -115,16 +115,17 @@ Maps above reproduced with the permission of the National Library of Scotland ht - - - - - + + + + + - + - + +
Katie McDonough
Katie McDonough

🔬 🤔 📖 📋 📆 👀 📢
Daniel C.S. Wilson
Daniel C.S. Wilson

🔬 🤔 📢 📖 📋
Kasra Hosseini
Kasra Hosseini

💻 🤔 🔬 👀 📢
Rosie Wood
Rosie Wood

💻 📖 🤔 📢 👀 🚧 🔬
Kalle Westerling
Kalle Westerling

💻 📖 🚧 👀 📢
Katie McDonough
Katie McDonough

🔬 🤔 📖 📆 👀 📢
Daniel C.S. Wilson
Daniel C.S. Wilson

🔬 🤔 📢 📖
Kasra Hosseini
Kasra Hosseini

💻 🤔 🔬 👀 📢
Rosie Wood
Rosie Wood

💻 📖 🤔 📢 👀 🚧 🔬
Kalle Westerling
Kalle Westerling

💻 📖 🚧 👀 📢
Chris Fleet
Chris Fleet

🔣
Kaspar Beelen
Kaspar Beelen

🤔 👀 🔬
Kaspar Beelen
Kaspar Beelen

🤔 👀 🔬
Andy Smith
Andy Smith

💻 📖 🧑‍🏫 👀
Andy Smith
Andy Smith

💻 📖 🧑‍🏫 👀
Adam Cox
Adam Cox

💻 ⚠️
diff --git a/docs/source/using-mapreader/step-by-step-guide/2-load.rst b/docs/source/using-mapreader/step-by-step-guide/2-load.rst index b3662a41..7e0c802c 100644 --- a/docs/source/using-mapreader/step-by-step-guide/2-load.rst +++ b/docs/source/using-mapreader/step-by-step-guide/2-load.rst @@ -211,6 +211,7 @@ This will create 1024 x 1024 pixel patches with 10% overlap between each patch. - ``square_cuts`` - This is a deprecated method and no longer recommended for use. By default, this is set to ``False`` and padding is added to patches at the edges of the parent image to ensure square patches. If you set ``square_cuts=True``, instead of padding, there will be some overlap between edge patches. - ``add_to_parent`` - By default, this is set to ``True`` so that each time you run ``patchify_all`` your patches are added to your ``MapImages`` object. Setting it to ``False`` (by specifying ``add_to_parent=False``) will mean your patches are created, but not added to your ``MapImages`` object. This can be useful for testing out different patch sizes. - ``rewrite`` - By default, this is set to ``False`` so that if your patches already exist they are not overwritten. Setting it to ``True`` (by specifying ``rewrite=True``) will mean already existing patches are recreated and overwritten. + - ``skip_blank_patches`` - By default, this is set to ``False``. Setting to ``True`` will omit any patches that only contain ``0`` values, which can speed up processing on irregularly shaped map images that have empty regions. The `Image.getbbox() `_ method is used to determine if a patch is blank. If you would like to save your patches as geo-referenced tiffs (i.e. geotiffs), use: diff --git a/mapreader/load/images.py b/mapreader/load/images.py index d7485254..71bc1c7e 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -1208,6 +1208,7 @@ def patchify_all( patch_size: int | None = 100, tree_level: str | None = "parent", path_save: str | None = None, + skip_blank_patches: bool = False, add_to_parents: bool | None = True, square_cuts: bool | None = False, resize_factor: bool | None = False, @@ -1234,6 +1235,8 @@ def patchify_all( Directory to save the patches. If None, will be set as f"patches_{patch_size}_{method}" (e.g. "patches_100_pixel"). By default None. + skip_blank_patches : bool + If True, any patch that only contains 0 values will be skipped, by default ``False``. Uses PIL.Image().get_bbox(). add_to_parents : bool, optional If True, patches will be added to the MapImages instance's ``images`` dictionary, by default ``True``. @@ -1313,6 +1316,7 @@ def patchify_all( add_to_parents=add_to_parents, resize_factor=resize_factor, output_format=output_format, + skip_blank_patches=skip_blank_patches, rewrite=rewrite, verbose=verbose, overlap=overlap, @@ -1326,6 +1330,7 @@ def _patchify_by_pixel( add_to_parents: bool | None = True, resize_factor: bool | None = False, output_format: str | None = "png", + skip_blank_patches: bool = False, rewrite: bool | None = False, verbose: bool | None = False, overlap: int | None = 0, @@ -1347,6 +1352,8 @@ def _patchify_by_pixel( If True, resize the images before patchifying, by default ``False``. output_format : str, optional Format to use when writing image files, by default ``"png"``. + skip_blank_patches : bool + If True, any patch that only contains 0 values will be skipped, by default ``False``. Uses PIL.Image().get_bbox(). rewrite : bool, optional If True, existing patches will be rewritten, by default ``False``. verbose : bool, optional @@ -1370,6 +1377,7 @@ def _patchify_by_pixel( ) height, width = img.height, img.width + overlap_pixels = int(patch_size * overlap) x = 0 while x < width: @@ -1389,6 +1397,15 @@ def _patchify_by_pixel( else: patch = img.crop((x, y, max_x, max_y)) + + # skip if blank and don't add to parents + if skip_blank_patches and patch.getbbox() is None: + self._print_if_verbose( + f"[INFO] Skipping empty patch: {patch_id}.", verbose + ) + y = y + patch_size - overlap_pixels + continue + if max_x == width: patch = ImageOps.pad( patch, (patch_size, patch.height), centering=(0, 0) @@ -1416,7 +1433,6 @@ def _patchify_by_pixel( self._add_patch_coords_id(patch_id) self._add_patch_polygons_id(patch_id) - overlap_pixels = int(patch_size * overlap) y = y + patch_size - overlap_pixels x = x + patch_size - overlap_pixels diff --git a/tests/sample_files/cropped_blank_corners_rgb.tif b/tests/sample_files/cropped_blank_corners_rgb.tif new file mode 100644 index 00000000..06334ebe Binary files /dev/null and b/tests/sample_files/cropped_blank_corners_rgb.tif differ diff --git a/tests/sample_files/cropped_blank_corners_rgba.tif b/tests/sample_files/cropped_blank_corners_rgba.tif new file mode 100644 index 00000000..93842095 Binary files /dev/null and b/tests/sample_files/cropped_blank_corners_rgba.tif differ diff --git a/tests/test_load/test_images.py b/tests/test_load/test_images.py index 0d2061f8..832fbcef 100644 --- a/tests/test_load/test_images.py +++ b/tests/test_load/test_images.py @@ -515,6 +515,26 @@ def test_patchify_pixels(sample_dir, image_id, tmp_path): assert os.path.isfile(f"{tmp_path}/patch-0-0-3-3-#{image_id}#.png") +def test_patchify_pixels_skip_blank_rgb(sample_dir, tmp_path): + maps = MapImages(f"{sample_dir}/cropped_blank_corners_rgb.tif") + maps.patchify_all(patch_size=3, path_save=tmp_path, skip_blank_patches=True) + parent_list = maps.list_parents() + patch_list = maps.list_patches() + assert len(parent_list) == 1 + assert len(patch_list) == 5 + assert os.path.isfile(f"{tmp_path}/patch-0-3-3-6-#cropped_blank_corners_rgb.tif#.png") + + +def test_patchify_pixels_skip_blank_rgba(sample_dir, tmp_path): + maps = MapImages(f"{sample_dir}/cropped_blank_corners_rgba.tif") + maps.patchify_all(patch_size=3, path_save=tmp_path, skip_blank_patches=True) + parent_list = maps.list_parents() + patch_list = maps.list_patches() + assert len(parent_list) == 1 + assert len(patch_list) == 5 + assert os.path.isfile(f"{tmp_path}/patch-0-3-3-6-#cropped_blank_corners_rgba.tif#.png") + + def test_patchify_pixels_square(sample_dir, image_id, tmp_path): maps = MapImages(f"{sample_dir}/{image_id}") maps.patchify_all(patch_size=5, path_save=f"{tmp_path}_square", square_cuts=True)