diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d43cfcc8..ce99d549a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ ### Fixed +- `generate_subcatalogs` can include multiple template values in a single subfolder layer + ([#595](https://github.com/stac-utils/pystac/pull/595)) - Avoid implicit re-exports ([#591](https://github.com/stac-utils/pystac/pull/591)) ### Deprecated diff --git a/docs/concepts.rst b/docs/concepts.rst index 0fe9a9288..6db632b9e 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -69,7 +69,7 @@ To use them you can pass in a strategy to the normalize_hrefs call. Using templates ''''''''''''''' -You can utilze template strings to determine the file paths of HREFs set on Catalogs, +You can utilize template strings to determine the file paths of HREFs set on Catalogs, Collection or Items. These templates use python format strings, which can name the property or attribute of the item you want to use for replacing the template variable. For example: @@ -84,7 +84,9 @@ variable. For example: The above code will save items in subfolders based on the collection ID, year and month of it's datetime (or start_datetime if a date range is defined and no datetime is -defined). +defined). Note that the forward slash (``/``) should be used as path separator in the +template string regardless of the system path separator (thus both in POSIX-compliant +and Windows environments). You can use dot notation to specify attributes of objects or keys in dictionaries for template variables. PySTAC will look at the object, it's ``properties`` and its diff --git a/pystac/catalog.py b/pystac/catalog.py index abbfbf0de..a76ffef38 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -666,13 +666,10 @@ def generate_subcatalogs( for link in item_links: link.resolve_stac_object(root=self.get_root()) item = cast(pystac.Item, link.target) - item_parts = layout_template.get_template_values(item) + subcat_ids = layout_template.substitute(item).split("/") id_iter = reversed(parent_ids) if all( - [ - "{}".format(id) == next(id_iter, None) - for id in reversed(list(item_parts.values())) - ] + ["{}".format(id) == next(id_iter, None) for id in reversed(subcat_ids)] ): # Skip items for which the sub-catalog structure already # matches the template. The list of parent IDs can include more @@ -680,12 +677,11 @@ def generate_subcatalogs( keep_item_links.append(link) continue curr_parent = self - for k, v in item_parts.items(): - subcat_id = "{}".format(v) + for subcat_id in subcat_ids: subcat = curr_parent.get_child(subcat_id) if subcat is None: - subcat_desc = "Catalog of items from {} with {} of {}".format( - curr_parent.id, k, v + subcat_desc = "Catalog of items from {} with id {}".format( + curr_parent.id, subcat_id ) subcat = pystac.Catalog(id=subcat_id, description=subcat_desc) curr_parent.add_child(subcat) diff --git a/pystac/layout.py b/pystac/layout.py index 5ed73ad94..ccc87d407 100644 --- a/pystac/layout.py +++ b/pystac/layout.py @@ -54,6 +54,10 @@ class LayoutTemplate: | ``collection`` | The collection ID of an Item's collection. | +--------------------+--------------------------------------------------------+ + The forward slash (``/``) should be used as path separator in the template + string regardless of the system path separator (thus both in POSIX-compliant + and Windows environments). + Examples:: # Uses the year, month and day of the item diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 03d05b390..d75051747 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -413,6 +413,30 @@ def test_generate_subcatalogs_does_not_change_item_count(self) -> None: actual, expected, msg=" for child '{}'".format(child.id) ) + def test_generate_subcatalogs_merge_template_elements(self) -> None: + catalog = Catalog(id="test", description="Test") + item_properties = [ + dict(property1=p1, property2=p2) for p1 in ("A", "B") for p2 in (1, 2) + ] + for ni, properties in enumerate(item_properties): + catalog.add_item( + Item( + id="item{}".format(ni), + geometry=ARBITRARY_GEOM, + bbox=ARBITRARY_BBOX, + datetime=datetime.utcnow(), + properties=properties, + ) + ) + result = catalog.generate_subcatalogs("${property1}_${property2}") + + actual_subcats = set([cat.id for cat in result]) + expected_subcats = set( + ["{}_{}".format(d["property1"], d["property2"]) for d in item_properties] + ) + self.assertEqual(len(result), len(expected_subcats)) + self.assertSetEqual(actual_subcats, expected_subcats) + def test_generate_subcatalogs_can_be_applied_multiple_times(self) -> None: catalog = TestCases.test_case_8() @@ -511,9 +535,7 @@ def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self) -> None: ) ) - result = catalog.generate_subcatalogs( - join_path_or_url(JoinType.PATH, "${property1}", "${property2}") - ) + result = catalog.generate_subcatalogs("${property1}/${property2}") self.assertEqual(len(result), 6) catalog.normalize_hrefs("/")