Skip to content
This repository was archived by the owner on Sep 9, 2024. It is now read-only.

Commit 746b930

Browse files
Features/update asset roles (#73)
* Modified the way assets are catalogued so that the asset type is no longer used a key to group assets. Instead an asset role is a piece of metadata for an asset and the fileame of the asset is used as the key. * Updated changelog and version number. * We now use the href and not just the filename for the asset key to ensure keys are unique. * Modified how to handle CMR assets that don't have an asset role specified as metadata key/value pair. In these situations we use the key as the asset role. * Updated changelog. * Fixed description in changelog * added correct client ids for dev, prod * Updated many methods in Collection to avoid conflating the values needing to be stored for the properties type and roles. Also added new property "roles" to Datafile to assist with the previously mentioned improvement. * Fixed bug with to_stac method. * Fixed pluralization with roles key in data_service --------- Co-authored-by: mike-gangl <michael.e.gangl@jpl.nasa.gov>
1 parent 1c06546 commit 746b930

File tree

8 files changed

+104
-49
lines changed

8 files changed

+104
-49
lines changed

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88

99

10-
## Unreleased
10+
## Unreleased [0.3.0]
1111
### Added
1212
### Fixed
1313
* fixed an issue with encoding a json deploy request twice [71](https://github.com/unity-sds/unity-py/issues/71)
1414
### Changed
15+
* We now use the asset URI/HREF as the key into the assets, and use the "metadata" and "data" types as asset-roles: [69](https://github.com/unity-sds/unity-py/issues/69)
1516
### Removed
1617
### Security
1718
### Deprecated
1819

1920

20-
--------
2121
## [0.2.2] - 2024-01-03
2222
### Added
2323
* Added project/venue support [5](https://github.com/unity-sds/unity-py/issues/58)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "unity-sds-client"
3-
version = "0.2.2"
3+
version = "0.3.0"
44
description = "Unity-Py is a Python client to simplify interactions with NASA's Unity Platform."
55
authors = ["Anil Natha, Mike Gangl"]
66
readme = "README.md"

tests/test_files/SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.json

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,21 @@
2424
}
2525
],
2626
"assets": {
27-
"data": {
27+
"SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.nc": {
2828
"href": "/unity/ads/sounder_sips/chirp_test_data/SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.nc",
29-
"title": "Main Data File"
29+
"title": "Main Data File",
30+
"description": "",
31+
"roles": [
32+
"data"
33+
]
3034
},
31-
"metadata_stac": {
35+
"SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.json": {
3236
"href": "/unity/ads/sounder_sips/chirp_test_data/SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.json",
33-
"title": "Metadata STAC File"
37+
"title": "Metadata STAC File",
38+
"description": "",
39+
"roles": [
40+
"metadata"
41+
]
3442
}
3543
},
3644
"bbox": "",

tests/test_unity_stac.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,34 @@ def test_read_stac():
2626
# Added 8/10/23 to check the STAC collection information
2727
assert datasets[1].collection_id == 'C2011289787-GES_DISC'
2828

29-
data_files = collection.data_locations()
29+
data_files = collection.data_files()
3030
assert len(data_files) == 6
31-
data_files = collection.data_locations(["data","opendap"])
31+
data_files = collection.data_files(["data","opendap"])
32+
assert len(data_files) == 2
33+
data_files = collection.data_files(["data","opendap","metadata"])
3234
assert len(data_files) == 4
33-
data_files = collection.data_locations(["data","opendap","metadata"])
34-
assert len(data_files) == 6
3535
data_files = collection.data_locations(["data"])
3636
assert len(data_files) == 2
37-
for x in data_files:
37+
data_locations = collection.data_locations(["data"])
38+
for x in data_locations:
3839
assert x in ['https://data.gesdisc.earthdata.nasa.gov/data/CHIRP/SNDR13CHRP1.2/2016/235/SNDR.SS1330.CHIRP.20160822T0005.m06.g001.L1_AQ.std.v02_48.G.200425095850.nc', 'https://data.gesdisc.earthdata.nasa.gov/data/CHIRP/SNDR13CHRP1.2/2016/235/SNDR.SS1330.CHIRP.20160822T0011.m06.g002.L1_AQ.std.v02_48.G.200425095901.nc']
3940

40-
#Try a "classic" catalog + item files stac catalog
41+
#Try a "classic" catalog + item files stac catalog
4142
collection = Collection.from_stac("tests/test_files/catalog_01.json")
4243
datasets = collection.datasets
4344
# Added 8/10/23 to check the STAC collection information
4445
assert datasets[0].collection_id == 'collection_test'
4546
assert len(datasets) == 1
46-
data_files = collection.data_locations()
47+
data_files = collection.data_files()
4748
assert len(data_files) == 2
48-
data_files = collection.data_locations(["data"])
49+
data_files = collection.data_files(["data", "metadata"])
50+
assert len(data_files) == 2
51+
data_files = collection.data_files(["data"])
4952
assert len(data_files) == 1
50-
data_files = collection.data_locations(["metadata_stac"])
53+
assert data_files[0].roles == ["data"]
54+
data_files = collection.data_files(["metadata"])
5155
assert len(data_files) == 1
52-
assert data_files[0] == "/unity/ads/sounder_sips/chirp_test_data/SNDR.SS1330.CHIRP.20160829T2317.m06.g233.L1_AQ.std.v02_48.G.200425130422.json"
56+
assert data_files[0].roles == ["metadata"]
5357

5458

5559
def test_write_stac():
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
[DEV]
2-
client_id = 2phr7qsbf1k3i9l38288n35ale
2+
client_id = 40c2s0ulbhp9i0fmaph3su9jch
33
auth_endpoint = https://cognito-idp.us-west-2.amazonaws.com
4-
unity_href = d3vc8w9zcq658.cloudfront.net
4+
unity_href = https://d3vc8w9zcq658.cloudfront.net/
55

66
[TEST]
77
client_id = 71894molftjtie4dvvkbjeard0
88
auth_endpoint = https://cognito-idp.us-west-2.amazonaws.com
99
unity_href = https://dxebrgu0bc9w7.cloudfront.net/
1010

1111
[PROD]
12-
client_id =
12+
client_id = 7vehllplbone6p4usqgutqun35
1313
auth_endpoint = https://cognito-idp.us-west-2.amazonaws.com
1414
unity_href = https://d2zjsabg0fonik.cloudfront.net/
1515

@@ -20,4 +20,4 @@ unity_href = https://d2zjsabg0fonik.cloudfront.net/
2020
;dapa_endpoint = https://58nbcawrvb.execute-api.us-west-2.amazonaws.com/test/
2121
;
2222
;[SPS]
23-
;sps_endpoint = http://a22b9d7b66df24e6fb3326ecd4cb0614-676486270.us-west-2.elb.amazonaws.com:5001/
23+
;sps_endpoint = http://a22b9d7b66df24e6fb3326ecd4cb0614-676486270.us-west-2.elb.amazonaws.com:5001/

unity_sds_client/resources/collection.py

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,42 @@ def datasets(self):
4242
List of dataset objects
4343
"""
4444
return self._datasets
45+
46+
def data_files(self, roles=[]):
47+
"""
48+
A method to list all assets (data, metdata, etc)
49+
Parameters
50+
----------
51+
type : List of Strings
52+
List of "stac asset roles" to filter on. commonly ["data"] is of most importance
4553
46-
def data_locations(self, type=[]):
54+
Returns
55+
-------
56+
files
57+
List of returned datafiles
58+
"""
59+
if len(roles) == 0:
60+
return [file for files in [x.datafiles for x in self._datasets] for file in files]
61+
else:
62+
return [file for files in [x.datafiles for x in self._datasets] for file in files if set(file.roles).intersection(set(roles))]
63+
64+
def data_locations(self, roles=[]):
4765
"""
4866
A method to list all asset locations (data, metdata, etc)
4967
Parameters
5068
----------
5169
type : List of Strings
52-
List of "stac asset keys" to filter on. commonly ["data"] is of most importance
70+
List of "stac asset roles" to filter on. commonly ["data"] is of most importance
5371
5472
Returns
5573
-------
5674
locations
5775
List of returned asset locations
5876
"""
59-
if len(type) == 0:
77+
if len(roles) == 0:
6078
return [file.location for files in [x.datafiles for x in self._datasets] for file in files]
6179
else:
62-
return [file.location for files in [x.datafiles for x in self._datasets] for file in files if file.type in type ]
80+
return [file.location for files in [x.datafiles for x in self._datasets] for file in files if set(file.roles).intersection(set(roles))]
6381

6482
def is_uri(path):
6583
if(path.startswith(tuple(["http:","https:","s3:"]))):
@@ -85,32 +103,38 @@ def to_stac(collection, data_dir):
85103
for dataset in collection._datasets:
86104
updated = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
87105
item = Item(
88-
id=dataset.id,
89-
geometry=dataset.geometry,
90-
bbox=dataset.bbox,
91-
collection=dataset.collection_id,
92-
datetime = date_parser.parse(dataset.data_begin_time),
93-
properties={
94-
"datetime": dataset.data_begin_time,
95-
"start_datetime": dataset.data_begin_time,
96-
"end_datetime":dataset.data_end_time,
97-
"created": dataset.data_create_time if dataset.data_create_time!= None else updated,
98-
"updated": updated
99-
},
100-
106+
id=dataset.id,
107+
geometry=dataset.geometry,
108+
bbox=dataset.bbox,
109+
collection=dataset.collection_id,
110+
datetime = date_parser.parse(dataset.data_begin_time),
111+
properties={
112+
"datetime": dataset.data_begin_time,
113+
"start_datetime": dataset.data_begin_time,
114+
"end_datetime":dataset.data_end_time,
115+
"created": dataset.data_create_time if dataset.data_create_time!= None else updated,
116+
"updated": updated
117+
},
101118
)
102119
item.properties.update(dataset.properties)
103120
catalog.add_item(item)
104121

105122
for df in dataset.datafiles:
123+
106124
if(Collection.is_uri(df.location)):
107125
item_location = df.location
108126
else:
109127
item_location = df.location.replace(data_dir,".")
128+
110129
item.add_asset(
111-
# key="data", asset=pystac.Asset(href=f,title="Main Data File", media_type=pystac.MediaType.HDF5)
112-
key=df.type, asset=Asset(href=item_location,title="{} file".format(df.type))
130+
key = item_location,
131+
asset = Asset(
132+
href = item_location,
133+
title = "{} file".format(df.type),
134+
description = "",
135+
roles = [df.roles]
113136
)
137+
)
114138

115139
from pystac.layout import TemplateLayoutStrategy
116140
write_dir = data_dir
@@ -182,13 +206,22 @@ def from_stac(stac_file):
182206
ds.properties.update(item.properties)
183207

184208
for asset_key in item.assets:
185-
asset = item.assets[asset_key]
209+
210+
asset:Asset = item.assets[asset_key]
211+
asset_type = asset.media_type if asset.media_type else ''
212+
asset_roles = asset.roles if asset.roles is not None else []
213+
asset_title = asset.title if asset.title is not None else ''
214+
asset_description = asset.description if asset.description is not None else ''
215+
216+
if len(asset_roles) == 0 and asset_key in ["data", "metadata"]:
217+
asset_roles = [asset_key]
218+
186219
if(Collection.is_uri(asset.href)):
187-
ds.add_data_file(DataFile(asset_key ,asset.href))
220+
ds.add_data_file(DataFile(asset_type, asset.href, roles=asset_roles, title=asset_title, description=asset_description))
188221
elif(os.path.isabs(asset.href)):
189-
ds.add_data_file(DataFile(asset_key ,asset.href))
222+
ds.add_data_file(DataFile(asset_type, asset.href, roles=asset_roles, title=asset_title, description=asset_description))
190223
else:
191-
ds.add_data_file(DataFile(asset_key ,os.path.join(stac_dir, asset.href)))
224+
ds.add_data_file(DataFile(asset_type, os.path.join(stac_dir, asset.href), roles=asset_roles, title=asset_title, description=asset_description))
192225

193226
collection._datasets.append(ds)
194227
return collection

unity_sds_client/resources/data_file.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ def __str__(self):
88
def __repr__(self):
99
return self.__str__()
1010

11-
def __init__(self, type, location ):
12-
self.type = type
11+
def __init__(self, type, location, roles = [], title = "", description = "" ):
12+
self.description = description
1313
self.location = location
14+
self.roles = roles
15+
self.title = title
16+
self.type = type

unity_sds_client/services/data_service.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,18 @@ def get_collection_data(self, collection: type= Collection):
6363
token = self._session.get_auth().get_token()
6464
response = requests.get(url, headers={"Authorization": "Bearer " + token})
6565
results = response.json()['features']
66-
66+
6767
for dataset in results:
6868
ds = Dataset(dataset['id'], collection.collection_id, dataset['properties']['start_datetime'], dataset['properties']['end_datetime'], dataset['properties']['created'])
69-
ds.add_data_file(DataFile("data" ,dataset['assets']['data']['href']))
70-
ds.add_data_file(DataFile("metadata" ,dataset['assets']['metadata__data']['href']))
69+
70+
for asset_key in dataset['assets']:
71+
location = dataset['assets'][asset_key]['href']
72+
file_type = dataset['assets'][asset_key].get('type', "")
73+
title = dataset['assets'][asset_key].get('title', "")
74+
description = dataset['assets'][asset_key].get('description', "")
75+
roles = dataset['assets'][asset_key]["roles"] if "roles" in dataset['assets'][asset_key] else ["metadata"] if asset_key in ['metadata__cmr','metadata__data'] else [asset_key]
76+
ds.add_data_file(DataFile(file_type, location, roles=roles, title=title, description=description))
77+
7178
datasets.append(ds)
7279

7380
return datasets

0 commit comments

Comments
 (0)