Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/adjust time function #272

Merged
merged 14 commits into from
Jun 25, 2024
14 changes: 13 additions & 1 deletion .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
poetry run flake8 podaac
- name: Test and coverage
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
poetry run pytest -n auto --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
- name: SonarCloud Scan
id: sonarcloud
uses: sonarsource/sonarcloud-github-action@master
Expand Down Expand Up @@ -295,3 +295,15 @@ jobs:
git config user.email "${GITHUB_ACTOR}@users.noreply.github.com"
git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
git push origin "${{ env.software_version }}"

- name: Deploy Harmony
env:
ENV: ${{ env.venue }}
CMR_USER: ${{ secrets.CMR_USER }}
CMR_PASS: ${{ secrets.CMR_PASS }}
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
working-directory: deployment
run:
poetry run python harmony_deploy.py --tag ${{ env.software_version }}
67 changes: 67 additions & 0 deletions deployment/harmony_deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import requests
import json
import logging
import argparse
from requests.auth import HTTPBasicAuth

# Environment variables
ENV = os.getenv('ENV')
CMR_USER = os.getenv('CMR_USER')
CMR_PASS = os.getenv('CMR_PASS')

def bearer_token() -> str:
tokens = []
headers = {'Accept': 'application/json'}
url = f"https://{'uat.' if ENV == 'uat' else ''}urs.earthdata.nasa.gov/api/users"

# First just try to get a token that already exists
try:
resp = requests.get(url + "/tokens", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)

for x in response_content:
tokens.append(x['access_token'])

except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# No tokens exist, try to create one
if not tokens:
try:
resp = requests.post(url + "/token", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)
tokens.append(response_content['access_token'])
except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# If still no token, then we can't do anything
if not tokens:
raise RuntimeError("Unable to get bearer token from EDL")

return next(iter(tokens))

if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Update the service image tag.")
parser.add_argument("--tag", help="The new tag version to update.", required=True)
args = parser.parse_args()

url = f"https://harmony.{'uat.' if ENV == 'uat' else ''}earthdata.nasa.gov/service-image-tag/podaac-l2-subsetter"
token = bearer_token()

headers = {
"Authorization": f"Bearer {token}",
"Content-type": "application/json"
}
data = {
"tag": args.tag
}

response = requests.put(url, headers=headers, json=data)

print(response.status_code)
try:
print(response.json())
except json.JSONDecodeError:
print("Response content is not in JSON format")
4 changes: 2 additions & 2 deletions podaac/subsetter/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def recreate_pixcore_dimensions(datasets: list):
dim_dict = {}
count = 0
for dataset in datasets:
dim_list_shape = list(dataset.dims.values())
current_dims = list(dataset.dims.keys())
dim_list_shape = list(dataset.sizes.values())
current_dims = list(dataset.sizes.keys())
frankinspace marked this conversation as resolved.
Show resolved Hide resolved
rename_list = []
for current_dim, dim_value in zip(current_dims, dim_list_shape):
if current_dim not in dim_dict:
Expand Down
5 changes: 2 additions & 3 deletions podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,11 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
for group in groups:
base_dataset.createGroup(group)

for dim_name in list(dataset.dims.keys()):
for dim_name in list(dataset.sizes.keys()):
new_dim_name = dim_name.split(GROUP_DELIM)[-1]
dim_group = _get_nested_group(base_dataset, dim_name)
if new_dim_name not in dim_group.dimensions:
dim_group.createDimension(new_dim_name, dataset.dims[dim_name])

dim_group.createDimension(new_dim_name, dataset.sizes[dim_name])
# Rename variables
_rename_variables(dataset, base_dataset, start_date, time_vars)

Expand Down
16 changes: 13 additions & 3 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import json
import operator
import os
import re
from itertools import zip_longest
from typing import List, Optional, Tuple, Union
import dateutil
Expand Down Expand Up @@ -270,11 +271,11 @@ def calculate_chunks(dataset: xr.Dataset) -> dict:
"""
if len(dataset.dims) <= 3:
chunk = {dim: 4000 for dim in dataset.dims
if dataset.dims[dim] > 4000
if dataset.sizes[dim] > 4000
and len(dataset.dims) > 1}
else:
chunk = {dim: 500 for dim in dataset.dims
if dataset.dims[dim] > 500}
if dataset.sizes[dim] > 500}

return chunk

Expand Down Expand Up @@ -528,7 +529,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
return time_vars[0]

# Filter variables with 'time' in the name to avoid extra work
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.sizes.keys()))

for var_name in time_vars:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
Expand All @@ -542,6 +543,15 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

time_units_pattern = re.compile(r"(days|d|hours|hr|h|minutes|min|m|seconds|sec|s) since \d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?")
# Check variables for common time variable indicators
for var_name, var in dataset.variables.items():
# pylint: disable=too-many-boolean-expressions
if ((('standard_name' in var.attrs and var.attrs['standard_name'] == 'time') or
('axis' in var.attrs and var.attrs['axis'] == 'T') or
('units' in var.attrs and time_units_pattern.match(var.attrs['units'])))) and var_name not in total_time_vars:
return var_name

# then check if any variables have 'time' in the string if the above loop doesn't return anything
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
Expand Down
Loading
Loading