podaac · sliu008 · Jun 25, 2024 · Jun 20, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/.github/workflows/build-pipeline.yml b/.github/workflows/build-pipeline.yml
@@ -84,7 +84,7 @@ jobs:
  poetry run flake8 podaac
  - name: Test and coverage
  run: |
- poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
+ poetry run pytest -n auto --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
  - name: SonarCloud Scan
  id: sonarcloud
  uses: sonarsource/sonarcloud-github-action@master
@@ -295,3 +295,15 @@ jobs:
  git config user.email "${GITHUB_ACTOR}@users.noreply.github.com"
  git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
  git push origin "${{ env.software_version }}"
+
+ - name: Deploy Harmony
+ env:
+ ENV: ${{ env.venue }}
+ CMR_USER: ${{ secrets.CMR_USER }}
+ CMR_PASS: ${{ secrets.CMR_PASS }}
+ if: |
+ github.ref == 'refs/heads/main' ||
+ startsWith(github.ref, 'refs/heads/release')
+ working-directory: deployment
+ run: 
+ poetry run python harmony_deploy.py --tag ${{ env.software_version }} 
diff --git a/deployment/harmony_deploy.py b/deployment/harmony_deploy.py
@@ -0,0 +1,67 @@
+import os
+import requests
+import json
+import logging
+import argparse
+from requests.auth import HTTPBasicAuth
+
+# Environment variables
+ENV = os.getenv('ENV')
+CMR_USER = os.getenv('CMR_USER')
+CMR_PASS = os.getenv('CMR_PASS')
+
+def bearer_token() -> str:
+ tokens = []
+ headers = {'Accept': 'application/json'}
+ url = f"https://{'uat.' if ENV == 'uat' else ''}urs.earthdata.nasa.gov/api/users"
+
+ # First just try to get a token that already exists
+ try:
+ resp = requests.get(url + "/tokens", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
+ response_content = json.loads(resp.content)
+
+ for x in response_content:
+ tokens.append(x['access_token'])
+
+ except Exception: # noqa E722
+ logging.warning("Error getting the token - check user name and password", exc_info=True)
+
+ # No tokens exist, try to create one
+ if not tokens:
+ try:
+ resp = requests.post(url + "/token", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
+ response_content = json.loads(resp.content)
+ tokens.append(response_content['access_token'])
+ except Exception: # noqa E722
+ logging.warning("Error getting the token - check user name and password", exc_info=True)
+
+ # If still no token, then we can't do anything
+ if not tokens:
+ raise RuntimeError("Unable to get bearer token from EDL")
+
+ return next(iter(tokens))
+
+if __name__ == "__main__":
+
+ parser = argparse.ArgumentParser(description="Update the service image tag.")
+ parser.add_argument("--tag", help="The new tag version to update.", required=True)
+ args = parser.parse_args()
+
+ url = f"https://harmony.{'uat.' if ENV == 'uat' else ''}earthdata.nasa.gov/service-image-tag/podaac-l2-subsetter"
+ token = bearer_token()
+
+ headers = {
+ "Authorization": f"Bearer {token}",
+ "Content-type": "application/json"
+ }
+ data = {
+ "tag": args.tag
+ }
+
+ response = requests.put(url, headers=headers, json=data)
+
+ print(response.status_code)
+ try:
+ print(response.json())
+ except json.JSONDecodeError:
+ print("Response content is not in JSON format")
diff --git a/podaac/subsetter/dimension_cleanup.py b/podaac/subsetter/dimension_cleanup.py
@@ -117,8 +117,8 @@ def recreate_pixcore_dimensions(datasets: list):
  dim_dict = {}
  count = 0
  for dataset in datasets:
- dim_list_shape = list(dataset.dims.values())
- current_dims = list(dataset.dims.keys())
+ dim_list_shape = list(dataset.sizes.values())
+ current_dims = list(dataset.sizes.keys())
  rename_list = []
  for current_dim, dim_value in zip(current_dims, dim_list_shape):
  if current_dim not in dim_dict:

diff --git a/podaac/subsetter/group_handling.py b/podaac/subsetter/group_handling.py
@@ -119,12 +119,11 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
  for group in groups:
  base_dataset.createGroup(group)
 
- for dim_name in list(dataset.dims.keys()):
+ for dim_name in list(dataset.sizes.keys()):
  new_dim_name = dim_name.split(GROUP_DELIM)[-1]
  dim_group = _get_nested_group(base_dataset, dim_name)
  if new_dim_name not in dim_group.dimensions:
- dim_group.createDimension(new_dim_name, dataset.dims[dim_name])
-
+ dim_group.createDimension(new_dim_name, dataset.sizes[dim_name])
  # Rename variables
  _rename_variables(dataset, base_dataset, start_date, time_vars)
 

diff --git a/podaac/subsetter/subset.py b/podaac/subsetter/subset.py
@@ -23,6 +23,7 @@
 import json
 import operator
 import os
+import re
 from itertools import zip_longest
 from typing import List, Optional, Tuple, Union
 import dateutil
@@ -270,11 +271,11 @@ def calculate_chunks(dataset: xr.Dataset) -> dict:
  """
  if len(dataset.dims) <= 3:
  chunk = {dim: 4000 for dim in dataset.dims
- if dataset.dims[dim] > 4000
+ if dataset.sizes[dim] > 4000
  and len(dataset.dims) > 1}
  else:
  chunk = {dim: 500 for dim in dataset.dims
- if dataset.dims[dim] > 500}
+ if dataset.sizes[dim] > 500}
 
  return chunk
 
@@ -528,7 +529,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
  return time_vars[0]
 
  # Filter variables with 'time' in the name to avoid extra work
- time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))
+ time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.sizes.keys()))
 
  for var_name in time_vars:
  if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
@@ -542,6 +543,15 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
  if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
  return var_name
 
+ time_units_pattern = re.compile(r"(days|d|hours|hr|h|minutes|min|m|seconds|sec|s) since \d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?")
+ # Check variables for common time variable indicators
+ for var_name, var in dataset.variables.items():
+ # pylint: disable=too-many-boolean-expressions
+ if ((('standard_name' in var.attrs and var.attrs['standard_name'] == 'time') or
+ ('axis' in var.attrs and var.attrs['axis'] == 'T') or
+ ('units' in var.attrs and time_units_pattern.match(var.attrs['units'])))) and var_name not in total_time_vars:
+ return var_name
+
  # then check if any variables have 'time' in the string if the above loop doesn't return anything
  for var_name in list(dataset.data_vars.keys()):
  var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]