From 8b40f68f95225e658e741fe6c0bb33af59da03ca Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 1 Aug 2024 10:50:35 -0700 Subject: [PATCH 1/2] fix(ingest): set lastObserved in sdk when unset Fixes a bug from https://github.com/datahub-project/datahub/pull/10829 --- metadata-ingestion/src/datahub/cli/cli_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 21841b173c23d..1b9cccb1cbc21 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -1,5 +1,6 @@ import json import logging +import time import typing from datetime import datetime from typing import Any, Dict, List, Optional, Tuple, Type, Union @@ -403,6 +404,8 @@ def ensure_has_system_metadata( if event.systemMetadata is None: event.systemMetadata = SystemMetadataClass() metadata = event.systemMetadata + if metadata.lastObserved == 0: + metadata.lastObserved = int(time.time() * 1000) if metadata.properties is None: metadata.properties = {} props = metadata.properties From e8452166bbbc08c1c3b7801ebc1a2815fd5d4ba5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 2 Aug 2024 10:19:40 -0700 Subject: [PATCH 2/2] fix test --- metadata-ingestion/tests/unit/test_rest_sink.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/tests/unit/test_rest_sink.py b/metadata-ingestion/tests/unit/test_rest_sink.py index efa6c6678a8c7..a76f96039c2c7 100644 --- a/metadata-ingestion/tests/unit/test_rest_sink.py +++ b/metadata-ingestion/tests/unit/test_rest_sink.py @@ -1,7 +1,9 @@ import json +from datetime import datetime, timezone import pytest import requests +from freezegun import freeze_time import datahub.metadata.schema_classes as models from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -9,6 +11,7 @@ MOCK_GMS_ENDPOINT = "http://fakegmshost:8080" +FROZEN_TIME = 1618987484580 basicAuditStamp = models.AuditStampClass( time=1618987484580, actor="urn:li:corpuser:datahub", @@ -76,7 +79,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -134,7 +137,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -178,7 +181,7 @@ } }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -263,7 +266,7 @@ "contentType": "application/json", }, "systemMetadata": { - "lastObserved": 0, + "lastObserved": FROZEN_TIME, "lastRunId": "no-run-id-provided", "properties": { "clientId": "acryl-datahub", @@ -276,6 +279,7 @@ ), ], ) +@freeze_time(datetime.fromtimestamp(FROZEN_TIME / 1000, tz=timezone.utc)) def test_datahub_rest_emitter(requests_mock, record, path, snapshot): def match_request_text(request: requests.Request) -> bool: requested_snapshot = request.json()