From db46b9b3699d546b3536b68cf480038f31b73f91 Mon Sep 17 00:00:00 2001 From: Alex Bednarek Date: Mon, 23 Sep 2024 13:54:58 -0400 Subject: [PATCH 1/2] API: Adding fallback to default latest query if record metadata.start is greater than 24 hrs in future. --- api/datalake_api/querier.py | 18 ++++++++++++++++++ api/tests/test_archive_querier.py | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/api/datalake_api/querier.py b/api/datalake_api/querier.py index 4b00c05..fd8a51c 100644 --- a/api/datalake_api/querier.py +++ b/api/datalake_api/querier.py @@ -20,6 +20,8 @@ import time import os +from datetime import datetime, timedelta +import decimal import logging log = logging.getLogger(__name__) @@ -40,6 +42,7 @@ slow and expensive. ''' DEFAULT_LOOKBACK_DAYS = 14 +LATEST_MAX_LOOKFORWARD_HOURS = 24 _ONE_DAY_MS = 24 * 60 * 60 * 1000 @@ -347,6 +350,9 @@ def _latest_table(self): return self.dynamodb.Table(self.latest_table_name) def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS): + now = datetime.utcnow() + max_lookforward = now + timedelta(hours=LATEST_MAX_LOOKFORWARD_HOURS) + if self.use_latest_table: log.info('inside use_latest_table=TRUE') response = self._latest_table.query( @@ -359,6 +365,18 @@ def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS): return self._default_latest(what, where, lookback_days) latest_item = items[0] + metadata_start = latest_item['metadata']['start'] + + if isinstance(metadata_start, (int, decimal.Decimal)): + metadata_start = datetime.utcfromtimestamp(float(metadata_start) / 1000) + + elif isinstance(metadata_start, str): + metadata_start = datetime.strptime(metadata_start, '%Y-%m-%dT%H:%M:%S.%fZ') + + if metadata_start > max_lookforward: + log.info(f"Record with metadata.start {metadata_start} is beyond MAX_LOOKFORWARD_HOURS. Falling back to default latest.") + return self._default_latest(what, where, lookback_days) + return dict(url=latest_item['url'], metadata=latest_item['metadata']) else: diff --git a/api/tests/test_archive_querier.py b/api/tests/test_archive_querier.py index 399d1d7..300eb0c 100644 --- a/api/tests/test_archive_querier.py +++ b/api/tests/test_archive_querier.py @@ -598,3 +598,19 @@ def test_query_latest_just_latest_table(table_maker, querier, record_maker): _validate_latest_result(result, what='meow', where='tree') else: assert result is None + + +def test_query_latest_future_record_exceeds_lookforward(table_maker, querier, record_maker): + future_start = (int(time.time() * 1000) + 25 * 60 * 60 * 1000) # 25 hours ahead + future_end = (int(time.time() * 1000) + 26 * 60 * 60 * 1000) # ends one hour later + record = record_maker(what='meow', where='tree', start=future_start, end=future_end) + + default_table, latest_table = table_maker([]) + print(default_table.__dict__, type(default_table)) + + default_table.put_item(Item=record[0]) + latest_table.put_item(Item=record[0]) + + result = querier.query_latest('meow', 'tree') + assert result is None, "No result should be returned if falling back to the default query" + From 1b526b13b69f99cb1724db95dcc5b39a32b93f35 Mon Sep 17 00:00:00 2001 From: Alex Bednarek Date: Mon, 23 Sep 2024 13:59:12 -0400 Subject: [PATCH 2/2] API: Cleanup comments. --- api/tests/test_archive_querier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/tests/test_archive_querier.py b/api/tests/test_archive_querier.py index 300eb0c..3dc84c9 100644 --- a/api/tests/test_archive_querier.py +++ b/api/tests/test_archive_querier.py @@ -601,8 +601,8 @@ def test_query_latest_just_latest_table(table_maker, querier, record_maker): def test_query_latest_future_record_exceeds_lookforward(table_maker, querier, record_maker): - future_start = (int(time.time() * 1000) + 25 * 60 * 60 * 1000) # 25 hours ahead - future_end = (int(time.time() * 1000) + 26 * 60 * 60 * 1000) # ends one hour later + future_start = (int(time.time() * 1000) + 25 * 60 * 60 * 1000) + future_end = (int(time.time() * 1000) + 26 * 60 * 60 * 1000) record = record_maker(what='meow', where='tree', start=future_start, end=future_end) default_table, latest_table = table_maker([])