diff --git a/.all-contributorsrc b/.all-contributorsrc
index 10de85ad..ae5374d3 100644
--- a/.all-contributorsrc
+++ b/.all-contributorsrc
@@ -31,7 +31,7 @@
"research",
"ideas",
"talk",
- "doc",
+ "doc"
]
},
{
@@ -107,6 +107,16 @@
"mentoring",
"review"
]
+ },
+ {
+ "login": "mradamcox",
+ "name": "Adam Cox",
+ "avatar_url": "https://avatars.githubusercontent.com/u/10427268?v=4",
+ "profile": "http://mradamcox.github.io",
+ "contributions": [
+ "code",
+ "test"
+ ]
}
],
"contributorsPerLine": 7,
@@ -114,5 +124,5 @@
"repoType": "github",
"repoHost": "https://github.com",
"projectName": "MapReader",
- "projectOwner": "Living-with-machines"
+ "projectOwner": "maps-as-data"
}
diff --git a/.github/workflows/mr_ci.yml b/.github/workflows/mr_ci.yml
index fef6e30b..0f4462bd 100644
--- a/.github/workflows/mr_ci.yml
+++ b/.github/workflows/mr_ci.yml
@@ -1,7 +1,10 @@
---
name: Units Tests
-on: [push]
+on:
+ pull_request:
+ branches:
+ - main
jobs:
diff --git a/.github/workflows/mr_ci_text_spotting.yml b/.github/workflows/mr_ci_text_spotting.yml
index d321d7bc..43bc3f1d 100644
--- a/.github/workflows/mr_ci_text_spotting.yml
+++ b/.github/workflows/mr_ci_text_spotting.yml
@@ -1,7 +1,10 @@
---
name: Units Tests - Text Spotting
-on: [push]
+on:
+ pull_request:
+ branches:
+ - main
# Run linter with github actions for quick feedbacks.
jobs:
diff --git a/.github/workflows/publish-to-conda-forge.yml b/.github/workflows/publish-to-conda-forge.yml
index e5950501..9b177cc3 100644
--- a/.github/workflows/publish-to-conda-forge.yml
+++ b/.github/workflows/publish-to-conda-forge.yml
@@ -4,6 +4,8 @@ name: Publish to Conda Forge
on:
workflow_dispatch:
push:
+ branches:
+ - main
tags:
- v*
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
index 502489b9..0ac400e6 100644
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -4,6 +4,8 @@ name: Publish Tagged Python 🐍 distributions 📦 to PyPI
on:
workflow_dispatch:
push:
+ branches:
+ - main
tags:
- v*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cabd1d60..6c8d381d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ _Add new changes here_
- Added ability to save and reload text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
- Added minimal dataclasses for text predictions ([#536](https://github.com/maps-as-data/MapReader/pull/536)
+- Added `skip_blank_patches` argument to `MapImages.patchify_all()` ([#540](https://github.com/maps-as-data/MapReader/pull/540))
## [v1.6.1](https://github.com/Living-with-machines/MapReader/releases/tag/v1.6.1) (2024-11-18)
diff --git a/README.md b/README.md
index b9bf1344..74b8d7b5 100644
--- a/README.md
+++ b/README.md
@@ -115,16 +115,17 @@ Maps above reproduced with the permission of the National Library of Scotland ht
diff --git a/docs/source/using-mapreader/step-by-step-guide/2-load.rst b/docs/source/using-mapreader/step-by-step-guide/2-load.rst
index b3662a41..7e0c802c 100644
--- a/docs/source/using-mapreader/step-by-step-guide/2-load.rst
+++ b/docs/source/using-mapreader/step-by-step-guide/2-load.rst
@@ -211,6 +211,7 @@ This will create 1024 x 1024 pixel patches with 10% overlap between each patch.
- ``square_cuts`` - This is a deprecated method and no longer recommended for use. By default, this is set to ``False`` and padding is added to patches at the edges of the parent image to ensure square patches. If you set ``square_cuts=True``, instead of padding, there will be some overlap between edge patches.
- ``add_to_parent`` - By default, this is set to ``True`` so that each time you run ``patchify_all`` your patches are added to your ``MapImages`` object. Setting it to ``False`` (by specifying ``add_to_parent=False``) will mean your patches are created, but not added to your ``MapImages`` object. This can be useful for testing out different patch sizes.
- ``rewrite`` - By default, this is set to ``False`` so that if your patches already exist they are not overwritten. Setting it to ``True`` (by specifying ``rewrite=True``) will mean already existing patches are recreated and overwritten.
+ - ``skip_blank_patches`` - By default, this is set to ``False``. Setting to ``True`` will omit any patches that only contain ``0`` values, which can speed up processing on irregularly shaped map images that have empty regions. The `Image.getbbox() `_ method is used to determine if a patch is blank.
If you would like to save your patches as geo-referenced tiffs (i.e. geotiffs), use:
diff --git a/mapreader/load/images.py b/mapreader/load/images.py
index d7485254..71bc1c7e 100644
--- a/mapreader/load/images.py
+++ b/mapreader/load/images.py
@@ -1208,6 +1208,7 @@ def patchify_all(
patch_size: int | None = 100,
tree_level: str | None = "parent",
path_save: str | None = None,
+ skip_blank_patches: bool = False,
add_to_parents: bool | None = True,
square_cuts: bool | None = False,
resize_factor: bool | None = False,
@@ -1234,6 +1235,8 @@ def patchify_all(
Directory to save the patches.
If None, will be set as f"patches_{patch_size}_{method}" (e.g. "patches_100_pixel").
By default None.
+ skip_blank_patches : bool
+ If True, any patch that only contains 0 values will be skipped, by default ``False``. Uses PIL.Image().get_bbox().
add_to_parents : bool, optional
If True, patches will be added to the MapImages instance's
``images`` dictionary, by default ``True``.
@@ -1313,6 +1316,7 @@ def patchify_all(
add_to_parents=add_to_parents,
resize_factor=resize_factor,
output_format=output_format,
+ skip_blank_patches=skip_blank_patches,
rewrite=rewrite,
verbose=verbose,
overlap=overlap,
@@ -1326,6 +1330,7 @@ def _patchify_by_pixel(
add_to_parents: bool | None = True,
resize_factor: bool | None = False,
output_format: str | None = "png",
+ skip_blank_patches: bool = False,
rewrite: bool | None = False,
verbose: bool | None = False,
overlap: int | None = 0,
@@ -1347,6 +1352,8 @@ def _patchify_by_pixel(
If True, resize the images before patchifying, by default ``False``.
output_format : str, optional
Format to use when writing image files, by default ``"png"``.
+ skip_blank_patches : bool
+ If True, any patch that only contains 0 values will be skipped, by default ``False``. Uses PIL.Image().get_bbox().
rewrite : bool, optional
If True, existing patches will be rewritten, by default ``False``.
verbose : bool, optional
@@ -1370,6 +1377,7 @@ def _patchify_by_pixel(
)
height, width = img.height, img.width
+ overlap_pixels = int(patch_size * overlap)
x = 0
while x < width:
@@ -1389,6 +1397,15 @@ def _patchify_by_pixel(
else:
patch = img.crop((x, y, max_x, max_y))
+
+ # skip if blank and don't add to parents
+ if skip_blank_patches and patch.getbbox() is None:
+ self._print_if_verbose(
+ f"[INFO] Skipping empty patch: {patch_id}.", verbose
+ )
+ y = y + patch_size - overlap_pixels
+ continue
+
if max_x == width:
patch = ImageOps.pad(
patch, (patch_size, patch.height), centering=(0, 0)
@@ -1416,7 +1433,6 @@ def _patchify_by_pixel(
self._add_patch_coords_id(patch_id)
self._add_patch_polygons_id(patch_id)
- overlap_pixels = int(patch_size * overlap)
y = y + patch_size - overlap_pixels
x = x + patch_size - overlap_pixels
diff --git a/tests/sample_files/cropped_blank_corners_rgb.tif b/tests/sample_files/cropped_blank_corners_rgb.tif
new file mode 100644
index 00000000..06334ebe
Binary files /dev/null and b/tests/sample_files/cropped_blank_corners_rgb.tif differ
diff --git a/tests/sample_files/cropped_blank_corners_rgba.tif b/tests/sample_files/cropped_blank_corners_rgba.tif
new file mode 100644
index 00000000..93842095
Binary files /dev/null and b/tests/sample_files/cropped_blank_corners_rgba.tif differ
diff --git a/tests/test_load/test_images.py b/tests/test_load/test_images.py
index 0d2061f8..832fbcef 100644
--- a/tests/test_load/test_images.py
+++ b/tests/test_load/test_images.py
@@ -515,6 +515,26 @@ def test_patchify_pixels(sample_dir, image_id, tmp_path):
assert os.path.isfile(f"{tmp_path}/patch-0-0-3-3-#{image_id}#.png")
+def test_patchify_pixels_skip_blank_rgb(sample_dir, tmp_path):
+ maps = MapImages(f"{sample_dir}/cropped_blank_corners_rgb.tif")
+ maps.patchify_all(patch_size=3, path_save=tmp_path, skip_blank_patches=True)
+ parent_list = maps.list_parents()
+ patch_list = maps.list_patches()
+ assert len(parent_list) == 1
+ assert len(patch_list) == 5
+ assert os.path.isfile(f"{tmp_path}/patch-0-3-3-6-#cropped_blank_corners_rgb.tif#.png")
+
+
+def test_patchify_pixels_skip_blank_rgba(sample_dir, tmp_path):
+ maps = MapImages(f"{sample_dir}/cropped_blank_corners_rgba.tif")
+ maps.patchify_all(patch_size=3, path_save=tmp_path, skip_blank_patches=True)
+ parent_list = maps.list_parents()
+ patch_list = maps.list_patches()
+ assert len(parent_list) == 1
+ assert len(patch_list) == 5
+ assert os.path.isfile(f"{tmp_path}/patch-0-3-3-6-#cropped_blank_corners_rgba.tif#.png")
+
+
def test_patchify_pixels_square(sample_dir, image_id, tmp_path):
maps = MapImages(f"{sample_dir}/{image_id}")
maps.patchify_all(patch_size=5, path_save=f"{tmp_path}_square", square_cuts=True)