Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Fix future metadata record date issue. #98

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions api/datalake_api/querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import time
import os

from datetime import datetime, timedelta
import decimal
import logging
log = logging.getLogger(__name__)

Expand All @@ -40,6 +42,7 @@
slow and expensive.
'''
DEFAULT_LOOKBACK_DAYS = 14
LATEST_MAX_LOOKFORWARD_HOURS = 24


_ONE_DAY_MS = 24 * 60 * 60 * 1000
Expand Down Expand Up @@ -347,6 +350,9 @@ def _latest_table(self):
return self.dynamodb.Table(self.latest_table_name)

def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS):
now = datetime.utcnow()
max_lookforward = now + timedelta(hours=LATEST_MAX_LOOKFORWARD_HOURS)

if self.use_latest_table:
log.info('inside use_latest_table=TRUE')
response = self._latest_table.query(
Expand All @@ -359,6 +365,18 @@ def query_latest(self, what, where, lookback_days=DEFAULT_LOOKBACK_DAYS):
return self._default_latest(what, where, lookback_days)

latest_item = items[0]
metadata_start = latest_item['metadata']['start']

if isinstance(metadata_start, (int, decimal.Decimal)):
metadata_start = datetime.utcfromtimestamp(float(metadata_start) / 1000)

elif isinstance(metadata_start, str):
metadata_start = datetime.strptime(metadata_start, '%Y-%m-%dT%H:%M:%S.%fZ')

if metadata_start > max_lookforward:
log.info(f"Record with metadata.start {metadata_start} is beyond MAX_LOOKFORWARD_HOURS. Falling back to default latest.")
return self._default_latest(what, where, lookback_days)

return dict(url=latest_item['url'], metadata=latest_item['metadata'])

else:
Expand Down
16 changes: 16 additions & 0 deletions api/tests/test_archive_querier.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,3 +598,19 @@ def test_query_latest_just_latest_table(table_maker, querier, record_maker):
_validate_latest_result(result, what='meow', where='tree')
else:
assert result is None


def test_query_latest_future_record_exceeds_lookforward(table_maker, querier, record_maker):
future_start = (int(time.time() * 1000) + 25 * 60 * 60 * 1000)
future_end = (int(time.time() * 1000) + 26 * 60 * 60 * 1000)
record = record_maker(what='meow', where='tree', start=future_start, end=future_end)

default_table, latest_table = table_maker([])
print(default_table.__dict__, type(default_table))

default_table.put_item(Item=record[0])
latest_table.put_item(Item=record[0])

result = querier.query_latest('meow', 'tree')
assert result is None, "No result should be returned if falling back to the default query"

Loading