From 3b9b5bbd119646ddd2d90122ff28d1a93f0f587b Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Tue, 9 Nov 2021 12:03:40 -0500 Subject: [PATCH 01/12] Add an extra SELF_CONTAINED catalog test --- tests/test_catalog.py | 17 +++++++++++++++++ tests/utils/test_cases.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index d75051747..7b03475bf 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -837,6 +837,23 @@ def check_all_absolute(cat: Catalog) -> None: c2.catalog_type = CatalogType.ABSOLUTE_PUBLISHED check_all_absolute(c2) + def test_self_contained_catalog_collection_item_links(self) -> None: + with tempfile.TemporaryDirectory() as tmp_dir: + cat = TestCases.test_case_9() + cat.normalize_hrefs(tmp_dir) + cat.validate_all() + print(f'Saving catalog to: {tmp_dir}') + cat.save(catalog_type=CatalogType.SELF_CONTAINED) + + # read the item back, to make experiment clean + item = Item.from_file(f'{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json') + # ensure that all links in the output json are relative + # everything of a nested level > 2 is not relative? + for link in item.links: + self.assertFalse(is_absolute_href(link.target)) + # use time sleep to pause there and inspect catalogs manually + # time.sleep(10000) + def test_full_copy_and_normalize_works_with_created_stac(self) -> None: cat = TestCases.test_case_3() cat_copy = cat.full_copy() diff --git a/tests/utils/test_cases.py b/tests/utils/test_cases.py index c9b8934b1..90b7110e9 100644 --- a/tests/utils/test_cases.py +++ b/tests/utils/test_cases.py @@ -242,3 +242,32 @@ def test_case_8() -> Collection: "data-files/catalogs/" "planet-example-v1.0.0-beta.2/collection.json" ) ) + + @staticmethod + def test_case_9() -> Collection: + """Manually created STAC Catalog, see issue https://github.com/stac-utils/pystac/issues/657""" + catalog = pystac.Catalog(id="catalog-issue-657", description="catalog-issue-657") + collection = pystac.Collection( + "collection-issue-657", + "collection-issue-657", + pystac.Extent( + spatial=pystac.SpatialExtent([[-180, -90, 180, 90]]), + temporal=pystac.TemporalExtent( + [[datetime(2021,11,1), None]] + ), + ), + license='proprietary' + ) + + item = pystac.Item( + id="item-issue-657", + stac_extensions=[], + geometry=ARBITRARY_GEOM, + bbox=ARBITRARY_BBOX, + datetime=datetime(2021,11,1), + properties={} + ) + + collection.add_item(item) + catalog.add_child(collection) + return catalog From 17f2d59fdd95de2b81ce5cdb2b33d59a07f81c5a Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Tue, 9 Nov 2021 15:21:17 -0500 Subject: [PATCH 02/12] Improve unit test --- tests/test_catalog.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 7b03475bf..0603bc298 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -850,7 +850,9 @@ def test_self_contained_catalog_collection_item_links(self) -> None: # ensure that all links in the output json are relative # everything of a nested level > 2 is not relative? for link in item.links: - self.assertFalse(is_absolute_href(link.target)) + href = link.get_href() + self.assertIsNotNone(href) + self.assertFalse(is_absolute_href(href)) # use time sleep to pause there and inspect catalogs manually # time.sleep(10000) From e71add51fa88a2c2872512a589c4e70960adb7f6 Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Tue, 9 Nov 2021 21:11:52 -0500 Subject: [PATCH 03/12] Run pre-commit --- tests/test_catalog.py | 6 ++++-- tests/utils/test_cases.py | 18 +++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 0603bc298..cd2c0f083 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -842,11 +842,13 @@ def test_self_contained_catalog_collection_item_links(self) -> None: cat = TestCases.test_case_9() cat.normalize_hrefs(tmp_dir) cat.validate_all() - print(f'Saving catalog to: {tmp_dir}') + print(f"Saving catalog to: {tmp_dir}") cat.save(catalog_type=CatalogType.SELF_CONTAINED) # read the item back, to make experiment clean - item = Item.from_file(f'{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json') + item = Item.from_file( + f"{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json" + ) # ensure that all links in the output json are relative # everything of a nested level > 2 is not relative? for link in item.links: diff --git a/tests/utils/test_cases.py b/tests/utils/test_cases.py index 90b7110e9..20a4ca18d 100644 --- a/tests/utils/test_cases.py +++ b/tests/utils/test_cases.py @@ -244,19 +244,19 @@ def test_case_8() -> Collection: ) @staticmethod - def test_case_9() -> Collection: + def test_case_9() -> Catalog: """Manually created STAC Catalog, see issue https://github.com/stac-utils/pystac/issues/657""" - catalog = pystac.Catalog(id="catalog-issue-657", description="catalog-issue-657") + catalog = pystac.Catalog( + id="catalog-issue-657", description="catalog-issue-657" + ) collection = pystac.Collection( "collection-issue-657", "collection-issue-657", pystac.Extent( - spatial=pystac.SpatialExtent([[-180, -90, 180, 90]]), - temporal=pystac.TemporalExtent( - [[datetime(2021,11,1), None]] - ), + spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]), + temporal=pystac.TemporalExtent([[datetime(2021, 11, 1), None]]), ), - license='proprietary' + license="proprietary", ) item = pystac.Item( @@ -264,8 +264,8 @@ def test_case_9() -> Collection: stac_extensions=[], geometry=ARBITRARY_GEOM, bbox=ARBITRARY_BBOX, - datetime=datetime(2021,11,1), - properties={} + datetime=datetime(2021, 11, 1), + properties={}, ) collection.add_item(item) From d4560304dbc255f823608eb5238c675f89f6be31 Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Wed, 10 Nov 2021 09:06:42 -0500 Subject: [PATCH 04/12] Make linter happy --- tests/test_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index cd2c0f083..65a2d7d2e 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -853,7 +853,7 @@ def test_self_contained_catalog_collection_item_links(self) -> None: # everything of a nested level > 2 is not relative? for link in item.links: href = link.get_href() - self.assertIsNotNone(href) + assert href is not None self.assertFalse(is_absolute_href(href)) # use time sleep to pause there and inspect catalogs manually # time.sleep(10000) From 8012f9451c73db53c632925c21ffdbf4a84e6030 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 14:47:17 -0500 Subject: [PATCH 05/12] Update test to avoid checking 'self' link 'self' link is always absolute; PySTAC avoids writing the link if the Catalog is self contained or relative published (besides the root catalog in the latter case). --- tests/test_catalog.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 65a2d7d2e..a10d45067 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -842,7 +842,7 @@ def test_self_contained_catalog_collection_item_links(self) -> None: cat = TestCases.test_case_9() cat.normalize_hrefs(tmp_dir) cat.validate_all() - print(f"Saving catalog to: {tmp_dir}") + cat.save(catalog_type=CatalogType.SELF_CONTAINED) # read the item back, to make experiment clean @@ -852,11 +852,15 @@ def test_self_contained_catalog_collection_item_links(self) -> None: # ensure that all links in the output json are relative # everything of a nested level > 2 is not relative? for link in item.links: + # self links are always absolute + if link.rel == "self": + continue + href = link.get_href() assert href is not None - self.assertFalse(is_absolute_href(href)) - # use time sleep to pause there and inspect catalogs manually - # time.sleep(10000) + self.assertFalse( + is_absolute_href(href), msg=f"Link with rel={link.rel} is absolute!" + ) def test_full_copy_and_normalize_works_with_created_stac(self) -> None: cat = TestCases.test_case_3() From 0d577f6048da5e2846c9e0e44a33958d7c6941f2 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 15:17:57 -0500 Subject: [PATCH 06/12] Inline test issue-specific catalog; check raw json --- tests/test_catalog.py | 52 ++++++++++++++++++++++++++++----------- tests/utils/test_cases.py | 29 ---------------------- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index a10d45067..c18742e6f 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -838,28 +838,52 @@ def check_all_absolute(cat: Catalog) -> None: check_all_absolute(c2) def test_self_contained_catalog_collection_item_links(self) -> None: + """See issue https://github.com/stac-utils/pystac/issues/657""" with tempfile.TemporaryDirectory() as tmp_dir: - cat = TestCases.test_case_9() - cat.normalize_hrefs(tmp_dir) - cat.validate_all() + catalog = pystac.Catalog( + id="catalog-issue-657", description="catalog-issue-657" + ) + collection = pystac.Collection( + "collection-issue-657", + "collection-issue-657", + pystac.Extent( + spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]), + temporal=pystac.TemporalExtent([[datetime(2021, 11, 1), None]]), + ), + license="proprietary", + ) - cat.save(catalog_type=CatalogType.SELF_CONTAINED) + item = pystac.Item( + id="item-issue-657", + stac_extensions=[], + geometry=ARBITRARY_GEOM, + bbox=ARBITRARY_BBOX, + datetime=datetime(2021, 11, 1), + properties={}, + ) + + collection.add_item(item) + catalog.add_child(collection) + + catalog.normalize_hrefs(tmp_dir) + catalog.validate_all() - # read the item back, to make experiment clean - item = Item.from_file( - f"{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json" + catalog.save(catalog_type=CatalogType.SELF_CONTAINED) + item_json = json.loads( + open( + f"{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json" + ).read() ) - # ensure that all links in the output json are relative - # everything of a nested level > 2 is not relative? - for link in item.links: + + for link in item_json["links"]: # self links are always absolute - if link.rel == "self": + if link["rel"] == "self": continue - href = link.get_href() - assert href is not None + href = link["href"] self.assertFalse( - is_absolute_href(href), msg=f"Link with rel={link.rel} is absolute!" + is_absolute_href(href), + msg=f"Link with rel={link['rel']} is absolute!", ) def test_full_copy_and_normalize_works_with_created_stac(self) -> None: diff --git a/tests/utils/test_cases.py b/tests/utils/test_cases.py index 20a4ca18d..c9b8934b1 100644 --- a/tests/utils/test_cases.py +++ b/tests/utils/test_cases.py @@ -242,32 +242,3 @@ def test_case_8() -> Collection: "data-files/catalogs/" "planet-example-v1.0.0-beta.2/collection.json" ) ) - - @staticmethod - def test_case_9() -> Catalog: - """Manually created STAC Catalog, see issue https://github.com/stac-utils/pystac/issues/657""" - catalog = pystac.Catalog( - id="catalog-issue-657", description="catalog-issue-657" - ) - collection = pystac.Collection( - "collection-issue-657", - "collection-issue-657", - pystac.Extent( - spatial=pystac.SpatialExtent([[-180.0, -90.0, 180.0, 90.0]]), - temporal=pystac.TemporalExtent([[datetime(2021, 11, 1), None]]), - ), - license="proprietary", - ) - - item = pystac.Item( - id="item-issue-657", - stac_extensions=[], - geometry=ARBITRARY_GEOM, - bbox=ARBITRARY_BBOX, - datetime=datetime(2021, 11, 1), - properties={}, - ) - - collection.add_item(item) - catalog.add_child(collection) - return catalog From 6ce1de216a06b48b7be3bfe6c7ea4e406dbbc99b Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 15:18:32 -0500 Subject: [PATCH 07/12] Set root on resolved links when setting on Catalog When setting the root on a Catalog, also set the root on any child or item objects that are resolved. This handles the case where child items and catalogs were created, added to a catalog, and then that catalog added to another catalog, invalidating the originally set root links of the leaf items and catalogs. --- pystac/catalog.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pystac/catalog.py b/pystac/catalog.py index a76ffef38..411818bdc 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -187,6 +187,13 @@ def set_root(self, root: Optional["Catalog"]) -> None: root._resolved_objects, self._resolved_objects ) + # Walk through resolved object links and update the root + for link in self.links: + if link.rel == pystac.RelType.CHILD or link.rel == pystac.RelType.ITEM: + target = link.target + if isinstance(target, STACObject): + target.set_root(root) + def is_relative(self) -> bool: return self.catalog_type in [ CatalogType.RELATIVE_PUBLISHED, From 1ba0f56306b925ada4498530a2a82e6f9033c1b7 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 16:19:36 -0500 Subject: [PATCH 08/12] Close file properly in test --- tests/test_catalog.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index c18742e6f..abc91f24a 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -869,11 +869,10 @@ def test_self_contained_catalog_collection_item_links(self) -> None: catalog.validate_all() catalog.save(catalog_type=CatalogType.SELF_CONTAINED) - item_json = json.loads( - open( - f"{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json" - ).read() - ) + with open( + f"{tmp_dir}/collection-issue-657/item-issue-657/item-issue-657.json" + ) as f: + item_json = json.load(f) for link in item_json["links"]: # self links are always absolute From efce55c443ef0b121ff0372d6e298c0cf443fddc Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 16:19:48 -0500 Subject: [PATCH 09/12] Message which catalog failed for better debugging --- tests/test_catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index abc91f24a..a25cb8005 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1119,7 +1119,7 @@ def check_item(self, item: Item, tag: str) -> None: self.check_link(link, tag) def check_catalog(self, c: Catalog, tag: str) -> None: - self.assertEqual(len(c.get_links("root")), 1) + self.assertEqual(len(c.get_links("root")), 1, msg=f"{c}") for link in c.links: self.check_link(link, tag) From 9a88216f694bf8479d9344f5f1b41f43286ee5ff Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 16:20:03 -0500 Subject: [PATCH 10/12] Don't recursively clear the root of children/items When clearing the root link of a Catalog, avoid clearing the root link of the children/items. This is to avoid the root link clearing in the "clone" method to avoid touching the un-cloned linked STAC Objects. --- pystac/catalog.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index 411818bdc..9f3c4dbb5 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -187,12 +187,12 @@ def set_root(self, root: Optional["Catalog"]) -> None: root._resolved_objects, self._resolved_objects ) - # Walk through resolved object links and update the root - for link in self.links: - if link.rel == pystac.RelType.CHILD or link.rel == pystac.RelType.ITEM: - target = link.target - if isinstance(target, STACObject): - target.set_root(root) + # Walk through resolved object links and update the root + for link in self.links: + if link.rel == pystac.RelType.CHILD or link.rel == pystac.RelType.ITEM: + target = link.target + if isinstance(target, STACObject): + target.set_root(root) def is_relative(self) -> bool: return self.catalog_type in [ From a9fb397301ae68415adf209247f0053f4c4f5809 Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 16:21:50 -0500 Subject: [PATCH 11/12] Use STACObject.set_root in full_copy This avoids logic in the child class implementations that are incompatible with the full_copy logic, such as the Catalog's call of set_root on of child and item links. This crawling logic is handled by full_copy itself. --- pystac/stac_object.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pystac/stac_object.py b/pystac/stac_object.py index e7aeb5104..43e5f0c73 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -363,7 +363,11 @@ def full_copy( if root is None and isinstance(clone, pystac.Catalog): root = clone - clone.set_root(cast(pystac.Catalog, root)) + # Set the root of the STAC Object using the base class, + # avoiding child class overrides + # extra logic which can be incompatible with the full copy. + STACObject.set_root(clone, cast(pystac.Catalog, root)) + if parent: clone.set_parent(parent) From a6a58b35cba8dd6e92da5b02ca29def9aced70de Mon Sep 17 00:00:00 2001 From: Rob Emanuele Date: Wed, 10 Nov 2021 16:30:07 -0500 Subject: [PATCH 12/12] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e7e471ff..4f92175f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,9 +22,10 @@ ### Fixed -- `generate_subcatalogs` can include multiple template values in a single subfolder layer +- `generate_subcatalogs` can include multiple template values in a single subfolder layer ([#595](https://github.com/stac-utils/pystac/pull/595)) - Avoid implicit re-exports ([#591](https://github.com/stac-utils/pystac/pull/591)) +- Fix issue that caused incorrect root links when constructing multi-leveled catalogs ([#658](https://github.com/stac-utils/pystac/pull/658)) ### Deprecated