From 571d760769620e9d1382a4ee539a9a10cac2ba5d Mon Sep 17 00:00:00 2001 From: Simon Liu Date: Mon, 6 Nov 2023 10:12:05 -0800 Subject: [PATCH 1/3] update add collection test to get the url for json history --- add_collection_test.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/add_collection_test.py b/add_collection_test.py index cd960de1..d0a029bf 100644 --- a/add_collection_test.py +++ b/add_collection_test.py @@ -6,6 +6,7 @@ import numpy as np import netCDF4 as nc import requests +import json from harmony import BBox, Client, Collection, Request, Environment import argparse from utils import FileHandler @@ -142,7 +143,10 @@ def verify_variables(merged_group, origin_group, subset_index, both_merged): unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=True)) -def verify_groups(merged_group, origin_group, subset_index, both_merged=False): +def verify_groups(merged_group, origin_group, subset_index, file=None, both_merged=False): + if file: + print("verifying groups ....." + file) + verify_dims(merged_group, origin_group, both_merged) verify_attrs(merged_group, origin_group, both_merged) verify_variables(merged_group, origin_group, subset_index, both_merged) @@ -150,7 +154,7 @@ def verify_groups(merged_group, origin_group, subset_index, both_merged=False): for child_group in origin_group.groups: merged_subgroup = merged_group[child_group] origin_subgroup = origin_group[child_group] - verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged) + verify_groups(merged_subgroup, origin_subgroup, subset_index, both_merged=both_merged) # GET TOKEN FROM CMR @@ -173,7 +177,7 @@ def download_file(url, local_path, headers): with open(local_path, 'wb') as file: for chunk in response.iter_content(chunk_size=8192): file.write(chunk) - print("Original File downloaded successfully.") + print("Original File downloaded successfully. " + local_path) else: print(f"Failed to download the file. Status code: {response.status_code}") @@ -233,34 +237,16 @@ def test(collection_id, venue): } original_files = merge_dataset.variables['subset_files'] + history_json = json.loads(merge_dataset.history_json) assert len(original_files) == max_results - for file in original_files: - - # if the file name end in an alphabet so we know there is some extension - if file[-1].isalpha(): - file_name = file.rsplit(".", 1)[0] - else: - file_name = file - - print(file_name) - cmr_query = f"{cmr_base_url}{file_name}&collection_concept_id={collection_id}" - print(cmr_query) - - response = requests.get(cmr_query, headers=headers) - - result = response.json() - links = result.get('items')[0].get('umm').get('RelatedUrls') - for link in links: - if link.get('Type') == 'GET DATA': - data_url = link.get('URL') - parsed_url = urlparse(data_url) - local_file_name = os.path.basename(parsed_url.path) - download_file(data_url, local_file_name, headers) + for url in history_json[0].get("derived_from"): + local_file_name = os.path.basename(url) + download_file(url, local_file_name, headers) for i, file in enumerate(original_files): origin_dataset = nc.Dataset(file) - verify_groups(merge_dataset, origin_dataset, i) + verify_groups(merge_dataset, origin_dataset, i, file=file) def run(): From 4703d29ccc538dfeb52ce0f18961e9863fd66bad Mon Sep 17 00:00:00 2001 From: Simon Liu Date: Mon, 6 Nov 2023 10:20:52 -0800 Subject: [PATCH 2/3] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c880a25..7d387a08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update notebook test to use python code directly instead of using jupyter notebook - Updated python libraries - Update history json to have url in history + - Update add collection test to use url in json history ### Deprecated ### Removed ### Fixed From 5a55c97c3139f28a2e5e914efa3c3d8a0d7467e8 Mon Sep 17 00:00:00 2001 From: Simon Liu Date: Mon, 6 Nov 2023 14:37:25 -0800 Subject: [PATCH 3/3] update test to test for nan --- add_collection_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/add_collection_test.py b/add_collection_test.py index d0a029bf..a3bc1db4 100644 --- a/add_collection_test.py +++ b/add_collection_test.py @@ -136,11 +136,15 @@ def verify_variables(merged_group, origin_group, subset_index, both_merged): merged_data = np.resize(merged_var[subset_index], origin_var.shape) origin_data = origin_var + equal_nan = True + if merged_data.dtype.kind == 'S': + equal_nan = False + # verify variable data if isinstance(origin_data, str): unittest.TestCase().assertEqual(merged_data, origin_data) else: - unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=True)) + unittest.TestCase().assertTrue(np.array_equal(merged_data, origin_data, equal_nan=equal_nan)) def verify_groups(merged_group, origin_group, subset_index, file=None, both_merged=False): @@ -221,6 +225,7 @@ def test(collection_id, venue): print('\nDone downloading.') filename = file_names[0] + # Handle time dimension and variables dropping merge_dataset = nc.Dataset(filename, 'r')