Skip to content

Commit 9f5b5cf

Browse files
authored
fix: get granules pagination (#502)
* breaking: using latest uds-lib + update docker * feat: use latest uds-lib * fix: (in progress) redo granules search * fix: pagination is working now. But total size is wrong (no solution) * fix: adding back total count as there will be only 1 doc * fix: sending real result size * fix: wrong logic to get pagination * fix: unsure where the error is * fix: update page size bug * fix: total size test case
1 parent 70ff291 commit 9f5b5cf

File tree

5 files changed

+54
-14
lines changed

5 files changed

+54
-14
lines changed

cumulus_lambda_functions/lib/uds_db/granules_db_index.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -303,18 +303,21 @@ def add_entry(self, tenant: str, tenant_venue: str, json_body: dict, doc_id: str
303303

304304
def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
305305
read_alias_name = f'{DBConstants.granules_read_alias_prefix}_{tenant}_{tenant_venue}'.lower().strip()
306-
if 'sort' not in search_dsl:
307-
search_result = self.__es.query(search_dsl,
308-
querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(
309-
search_dsl, querying_index=read_alias_name)
306+
if 'sort' not in search_dsl: # We cannot paginate w/o sort. So, max is 10k items:
307+
# This also assumes "size" should be part of search_dsl
308+
search_result = self.__es.query(search_dsl, querying_index=read_alias_name)
310309
LOGGER.debug(f'search_finished: {len(search_result["hits"]["hits"])}')
311310
return search_result
311+
# we can run paginate search
312312
original_size = search_dsl['size'] if 'size' in search_dsl else 20
313+
total_size = -999
313314
result = []
314315
duplicates = set([])
315316
while len(result) < original_size:
316317
search_dsl['size'] = (original_size - len(result)) * 2
317-
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name) if 'sort' in search_dsl else self.__es.query(search_dsl, querying_index=read_alias_name)
318+
search_result = self.__es.query_pages(search_dsl, querying_index=read_alias_name)
319+
if total_size == -999:
320+
total_size = self.__es.get_result_size(search_result)
318321
if len(search_result['hits']['hits']) < 1:
319322
break
320323
for each in search_result['hits']['hits']:
@@ -324,10 +327,12 @@ def dsl_search(self, tenant: str, tenant_venue: str, search_dsl: dict):
324327
search_dsl['search_after'] = search_result['hits']['hits'][-1]['sort']
325328

326329
LOGGER.debug(f'search_finished: {len(result)}')
330+
if len(result) > original_size:
331+
result = result[:original_size]
327332
return {
328333
'hits': {
329334
"total": {
330-
"value": len(result)
335+
"value": total_size,
331336
},
332337
'hits': result
333338
}

cumulus_lambda_functions/uds_api/dapa/granules_dapa_query_es.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def __generate_es_dsl(self):
3939
if self.__filter_input is not None:
4040
query_terms.append(CqlParser('properties').transform(self.__filter_input))
4141
query_dsl = {
42-
'track_total_hits': True,
42+
'track_total_hits': self.__offset is None,
4343
'size': self.__limit,
4444
# "collapse": {"field": "id"},
4545
'sort': [
@@ -228,11 +228,11 @@ def start(self):
228228
each_granules_query_result_stripped['links'].append(self_link)
229229
self.__restructure_each_granule_result(each_granules_query_result_stripped)
230230

231-
pagination_link = '' if len(granules_query_result['hits']['hits']) < self.__limit else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
231+
pagination_link = '' if len(granules_query_result['hits']['hits']) < 1 else ','.join([k if isinstance(k, str) else str(k) for k in granules_query_result['hits']['hits'][-1]['sort']])
232232
return {
233233
'statusCode': 200,
234234
'body': {
235-
'numberMatched': {'total_size': result_size},
235+
'numberMatched': {'total_size': -1 if self.__offset is not None else result_size},
236236
'numberReturned': len(granules_query_result['hits']['hits']),
237237
'stac_version': '1.0.0',
238238
'type': 'FeatureCollection', # TODO correct name?

tests/cumulus_lambda_functions/lib/uds_db/test_granules_db_index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def test_01(self):
3737
def test_02(self):
3838
os.environ['ES_URL'] = 'https://vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com'
3939
os.environ['ES_PORT'] = '9200'
40-
self.tenant = 'UDS_MY_LOCAL_ARCHIVE_TEST' # 'uds_local_test' # 'uds_sandbox'
40+
self.tenant = 'UDS_LOCAL_TEST_3' # 'uds_local_test' # 'uds_sandbox'
4141
self.tenant_venue = 'DEV' # 'DEV1' # 'dev'
4242
search_dsl = {
4343
'track_total_hits': True,
@@ -48,7 +48,7 @@ def test_02(self):
4848
],
4949
'query': {
5050
'bool': {
51-
'must': {'term': {'collection': {'value': f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:UDS_UNIT_COLLECTION___2408290522'}}}
51+
'must': {'term': {'collection': {'value': f'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001'}}}
5252
}
5353
},
5454
}

tests/cumulus_lambda_functions/uds_api/dapa/test_granules_dapa_query_es.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,25 @@ def setUp(self) -> None:
2222
self.collection_version = '001'
2323
return
2424

25+
def test_start_01(self):
26+
os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com'
27+
os.environ['ES_PORT'] = '9200'
28+
self.tenant = 'UDS_LOCAL_TEST_3' # 'uds_local_test' # 'uds_sandbox'
29+
self.tenant_venue = 'DEV' # 'DEV1' # 'dev'
30+
self.collection_name = 'DDD-01' # 'uds_collection' # 'sbx_collection'
31+
self.collection_version = '001'
32+
33+
collection_id = f'URN:NASA:UNITY:{self.tenant}:{self.tenant_venue}:{self.collection_name}___{self.collection_version}'
34+
granule_id = f'{collection_id}:test_file09'
35+
36+
granules_dapa_query = GranulesDapaQueryEs(collection_id, 10, '1736291597733,URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file10', None, None, None, f'localhost/api-prefix')
37+
granules_result = granules_dapa_query.start()
38+
print(granules_result)
39+
print([k['id'] for k in granules_result['body']['features']])
40+
# ['URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file20', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file19', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file14', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file17', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file18', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file12', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file13', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file15', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file06', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file01']
41+
# ['URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file05', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file03', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file09', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file16', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file11', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file04', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file08', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file02', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file07', 'URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001:test_file10']
42+
return
43+
2544
def test_get_single_granule_01(self):
2645
os.environ['ES_URL'] = 'vpc-uds-sbx-cumulus-es-qk73x5h47jwmela5nbwjte4yzq.us-west-2.es.amazonaws.com'
2746
os.environ['ES_PORT'] = '9200'

tests/integration_tests/test_uds_api.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ def test_collections_get_single_granule(self):
184184
return
185185

186186
def test_granules_get(self):
187-
# post_url = f'{self.uds_url}collections/urn:nasa:unity:unity:dev:SBG-L2A_RFL___1/items/' # MCP Dev
188-
post_url = f'{self.uds_url}collections/urn:nasa:unity:asips:int:P1590011-T___1/items/' # MCP OPS
187+
post_url = f'{self.uds_url}collections/URN:NASA:UNITY:UDS_LOCAL_TEST_3:DEV:DDD-01___001/items/?limit=10' # MCP Dev
188+
# post_url = f'{self.uds_url}collections/urn:nasa:unity:asips:int:P1590011-T___1/items/?limit=10' # MCP OPS
189189
headers = {
190190
'Authorization': f'Bearer {self.bearer_token}',
191191
}
@@ -194,11 +194,27 @@ def test_granules_get(self):
194194
headers=headers,
195195
)
196196
response_json = json.loads(query_result.text)
197-
print(json.dumps(response_json, indent=4))
197+
# print(json.dumps(response_json, indent=4))
198+
print(f"length: {len(response_json['features'])}")
198199
self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}')
200+
total_size = response_json['numberMatched']['total_size']
201+
current_size = len(response_json['features'])
199202
links = {k['rel']: k['href'] for k in response_json['links'] if k['rel'] != 'root'}
200203
for k, v in links.items():
201204
self.assertTrue(v.startswith(self.uds_url), f'missing stage: {self.stage} in {v} for {k}')
205+
206+
while len(response_json['features']) > 0:
207+
get_next_link = [k['href'] for k in response_json['links'] if k['rel'] == 'next']
208+
get_next_link = get_next_link[0]
209+
print(get_next_link)
210+
query_result = requests.get(url=get_next_link,
211+
headers=headers,
212+
)
213+
response_json = json.loads(query_result.text)
214+
self.assertEqual(query_result.status_code, 200, f'wrong status code. {query_result.text}')
215+
current_size += len(response_json['features'])
216+
print(f"length: {len(response_json['features'])}")
217+
self.assertEqual(total_size, current_size, f'mismatched size')
202218
return
203219

204220
def test_single_granule_get(self):

0 commit comments

Comments
 (0)