podaac · skorper · May 31, 2022 · May 27, 2022 · May 27, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+ - HARMONY-1178
+   - Handle paged STAC catalogs
 ### Changed 
 - Updated dependency versions
 ### Deprecated 

diff --git a/podaac/merger/harmony/service.py b/podaac/merger/harmony/service.py
@@ -9,7 +9,7 @@
 
 from harmony.adapter import BaseHarmonyAdapter
 from harmony.util import bbox_to_geometry, stage
-from pystac import Item
+from pystac import Catalog, Item
 from pystac.item import Asset
 
 from podaac.merger.merge import merge_netcdf_files
@@ -39,7 +39,7 @@ def invoke(self):
 
         return (self.message, self.process_catalog(self.catalog))
 
-    def process_catalog(self, catalog):
+    def process_catalog(self, catalog: Catalog):
         """
         Recursively process a catalog and all its children. Adapted from
         BaseHarmonyAdapter._process_catalog_recursive to specfifically
@@ -57,13 +57,10 @@ def process_catalog(self, catalog):
         """
         result = catalog.clone()
         result.id = str(uuid4())
-
-        # Recursively process all sub-catalogs
-        children = catalog.get_children()
         result.clear_children()
-        result.add_children([self.process_catalog(child) for child in children])
 
-        items = list(catalog.get_items())
+        # Get all the items from the catalog, including from child or linked catalogs
+        items = list(self.get_all_catalog_items(catalog))
 
         # Quick return if catalog contains no items
         if len(items) == 0:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,7 +18,7 @@ packages = [
 python = "^3.9"
 netCDF4 = "^1.5.6"
 numpy = "^1.20.3"
-harmony-service-lib = "^1.0.9"
+harmony-service-lib = "^1.0.16"
 importlib-metadata = "^4.8.1"
 
 [tool.poetry.dev-dependencies]

diff --git a/tests/data/harmony/source/catalog0.json b/tests/data/harmony/source/catalog0.json
@@ -0,0 +1,36 @@
+{
+  "stac_version": "1.0.0-beta.2",
+  "stac_extensions": [],
+  "id": "cfc32383-cfd1-4e43-8d5f-55f539b6fa59",
+  "links": [
+    {
+      "rel": "harmony_source",
+      "href": "https://cmr.uat.earthdata.nasa.gov/search/concepts/C1234088182-EEDTEST"
+    },
+    {
+      "rel": "item",
+      "href": "./granule_0_0_0000000.json",
+      "type": "application/json",
+      "title": "2020_01_01_7f00ff_global"
+    },
+    {
+      "rel": "item",
+      "href": "./granule_0_0_0000001.json",
+      "type": "application/json",
+      "title": "2020_01_02_3200ff_global"
+    },
+    {
+      "rel": "item",
+      "href": "./granule_0_0_0000002.json",
+      "type": "application/json",
+      "title": "2020_01_03_0019ff_global"
+    },
+    {
+      "rel": "next",
+      "href": "tests/data/harmony/source/catalog1.json",
+      "type": "application/json",
+      "title": "Next page"
+    }
+  ],
+  "description": "CMR Granules for C1234088182-EEDTEST batch 1"
+}
diff --git a/tests/data/harmony/source/catalog1.json b/tests/data/harmony/source/catalog1.json
@@ -0,0 +1,30 @@
+{
+  "stac_version": "1.0.0-beta.2",
+  "stac_extensions": [],
+  "id": "cfc32383-cfd1-4e43-8d5f-55f539b6fa59",
+  "links": [
+    {
+      "rel": "harmony_source",
+      "href": "https://cmr.uat.earthdata.nasa.gov/search/concepts/C1234088182-EEDTEST"
+    },
+    {
+      "rel": "item",
+      "href": "./granule_0_0_0000003.json",
+      "type": "application/json",
+      "title": "2020_01_04_0065ff_global"
+    },
+    {
+      "rel": "item",
+      "href": "./granule_0_0_0000004.json",
+      "type": "application/json",
+      "title": "2020_01_05_00b2ff_global"
+    },
+    {
+      "rel": "prev",
+      "href": "tests/data/harmony/source/catalog0.json",
+      "type": "application/json",
+      "title": "Previous page"
+    }
+  ],
+  "description": "CMR Granules for C1234088182-EEDTEST batch 2"
+}
diff --git a/tests/test_harmony.py b/tests/test_harmony.py
@@ -33,68 +33,71 @@ def test_service_invoke(self):
         in_message_data = in_message_path.read_text()
         in_message = json.loads(in_message_data)
 
-        in_catalog_path = self.__harmony_path.joinpath('source', 'catalog.json')
-
-        test_args = [
-            podaac.merger.harmony.cli.__file__,
-            '--harmony-action', 'invoke',
-            '--harmony-input', in_message_data,
-            '--harmony-source', str(in_catalog_path),
-            '--harmony-metadata-dir', str(self.__output_path),
-            '--harmony-data-location', self.__output_path.as_uri()
-        ]
-
-        test_env = {
-            'ENV': 'dev',
-            'OAUTH_CLIENT_ID': '',
-            'OAUTH_UID': '',
-            'OAUTH_PASSWORD': '',
-            'OAUTH_REDIRECT_URI': '',
-            'STAGING_PATH': '',
-            'STAGING_BUCKET': ''
-        }
-
-        with patch.object(sys, 'argv', test_args), patch.dict(environ, test_env):
-            podaac.merger.harmony.cli.main()
-
-        out_catalog_path = self.__output_path.joinpath('catalog.json')
-        out_catalog = json.loads(out_catalog_path.read_text())
-
-        item_meta = next(item for item in out_catalog['links'] if item['rel'] == 'item')
-        item_href = item_meta['href']
-        item_path = self.__output_path.joinpath(item_href).resolve()
-
-        # -- Item Verification --
-        item = json.loads(item_path.read_text())
-        properties = item['properties']
-
-        # Accumulation method checks
-        self.assertEqual(item['bbox'], [-4, -3, 4, 3])
-        self.assertEqual(properties['start_datetime'], '2020-01-01T00:00:00+00:00')
-        self.assertEqual(properties['end_datetime'], '2020-01-05T23:59:59+00:00')
-
-        # -- Asset Verification --
-        data = item['assets']['data']
-        collection_name = in_message['sources'][0]['collection']
-
-        # Sanity checks on metadata
-        self.assertTrue(data['href'].endswith(f'/{collection_name}_merged.nc4'))
-        self.assertEqual(data['title'], f'{collection_name}_merged.nc4')
-        self.assertEqual(data['type'], 'application/x-netcdf4')
-        self.assertEqual(data['roles'], ['data'])
-
-        # -- subset_files Verification --
-        file_list = [
-            '2020_01_01_7f00ff_global.nc',
-            '2020_01_02_3200ff_global.nc',
-            '2020_01_03_0019ff_global.nc',
-            '2020_01_04_0065ff_global.nc',
-            '2020_01_05_00b2ff_global.nc'
-        ]
-
-        path = urlsplit(data['href']).path
-        dataset = Dataset(path)
-        subset_files = dataset['subset_files'][:].tolist()
-        subset_files.sort()
-
-        self.assertEqual(file_list, subset_files)
+        # test with both paged catalogs and un-paged catalogs
+        for in_catalog_name in ['catalog.json', 'catalog0.json']:
+
+            in_catalog_path = self.__harmony_path.joinpath('source', in_catalog_name)
+
+            test_args = [
+                podaac.merger.harmony.cli.__file__,
+                '--harmony-action', 'invoke',
+                '--harmony-input', in_message_data,
+                '--harmony-source', str(in_catalog_path),
+                '--harmony-metadata-dir', str(self.__output_path),
+                '--harmony-data-location', self.__output_path.as_uri()
+            ]
+
+            test_env = {
+                'ENV': 'dev',
+                'OAUTH_CLIENT_ID': '',
+                'OAUTH_UID': '',
+                'OAUTH_PASSWORD': '',
+                'OAUTH_REDIRECT_URI': '',
+                'STAGING_PATH': '',
+                'STAGING_BUCKET': ''
+            }
+
+            with patch.object(sys, 'argv', test_args), patch.dict(environ, test_env):
+                podaac.merger.harmony.cli.main()
+
+            out_catalog_path = self.__output_path.joinpath('catalog.json')
+            out_catalog = json.loads(out_catalog_path.read_text())
+
+            item_meta = next(item for item in out_catalog['links'] if item['rel'] == 'item')
+            item_href = item_meta['href']
+            item_path = self.__output_path.joinpath(item_href).resolve()
+
+            # -- Item Verification --
+            item = json.loads(item_path.read_text())
+            properties = item['properties']
+
+            # Accumulation method checks
+            self.assertEqual(item['bbox'], [-4, -3, 4, 3])
+            self.assertEqual(properties['start_datetime'], '2020-01-01T00:00:00+00:00')
+            self.assertEqual(properties['end_datetime'], '2020-01-05T23:59:59+00:00')
+
+            # -- Asset Verification --
+            data = item['assets']['data']
+            collection_name = in_message['sources'][0]['collection']
+
+            # Sanity checks on metadata
+            self.assertTrue(data['href'].endswith(f'/{collection_name}_merged.nc4'))
+            self.assertEqual(data['title'], f'{collection_name}_merged.nc4')
+            self.assertEqual(data['type'], 'application/x-netcdf4')
+            self.assertEqual(data['roles'], ['data'])
+
+            # -- subset_files Verification --
+            file_list = [
+                '2020_01_01_7f00ff_global.nc',
+                '2020_01_02_3200ff_global.nc',
+                '2020_01_03_0019ff_global.nc',
+                '2020_01_04_0065ff_global.nc',
+                '2020_01_05_00b2ff_global.nc'
+            ]
+
+            path = urlsplit(data['href']).path
+            dataset = Dataset(path)
+            subset_files = dataset['subset_files'][:].tolist()
+            subset_files.sort()
+
+            self.assertEqual(file_list, subset_files)