Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ingest/gc): logging and stopping fix #12266

Merged
merged 6 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,11 @@
break
params["scrollId"] = document["scrollId"]
except Exception as e:
logger.error(
f"ergc({self.instance_id}): failed to fetch next batch of execution requests: {e}"
self.report.failure(

Check warning on line 161 in metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py#L161

Added line #L161 was not covered by tests
title="failed to fetch next batch of execution requests",
message="failed to fetch next batch of execution requests",
anshbansal marked this conversation as resolved.
Show resolved Hide resolved
context=str(self.instance_id),
exc=e,
)
self.report.ergc_read_errors += 1

Expand Down Expand Up @@ -231,8 +234,11 @@
self.graph.delete_entity(entry.urn, True)
except Exception as e:
self.report.ergc_delete_errors += 1
logger.error(
f"ergc({self.instance_id}): failed to delete ExecutionRequest {entry.request_id}: {e}"
self.report.failure(

Check warning on line 237 in metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/execution_request_cleanup.py#L237

Added line #L237 was not covered by tests
title="failed to delete ExecutionRequest",
message="failed to delete ExecutionRequest",
anshbansal marked this conversation as resolved.
Show resolved Hide resolved
context=str(self.instance_id),
exc=e,
)

def _reached_runtime_limit(self) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@
f"Dry run is on otherwise it would have deleted {urn} with hard deletion"
)
return
if self._deletion_limit_reached() or self._times_up():
return

Check warning on line 167 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L166-L167

Added lines #L166 - L167 were not covered by tests
self._increment_removal_started_count()
self.ctx.graph.delete_entity(urn=urn, hard=True)
self.ctx.graph.delete_references_to_urn(
Expand Down Expand Up @@ -203,11 +205,10 @@
for future in done:
self._print_report()
if future.exception():
logger.error(
f"Failed to delete entity {futures[future]}: {future.exception()}"
)
self.report.failure(
f"Failed to delete entity {futures[future]}",
title="Failed to delete entity",
message=futures[future],
anshbansal marked this conversation as resolved.
Show resolved Hide resolved
context=str(futures[future]),
exc=future.exception(),
)
self.report.num_soft_deleted_entity_processed += 1
Expand Down Expand Up @@ -274,6 +275,28 @@
)
yield from self._get_soft_deleted_queries()

def _times_up(self) -> bool:
if (

Check warning on line 279 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L278-L279

Added lines #L278 - L279 were not covered by tests
self.config.runtime_limit_seconds
and time.time() - self.start_time > self.config.runtime_limit_seconds
):
logger.info(

Check warning on line 283 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L283

Added line #L283 was not covered by tests
f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached"
)
anshbansal marked this conversation as resolved.
Show resolved Hide resolved
return True
return False

Check warning on line 287 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L286-L287

Added lines #L286 - L287 were not covered by tests

def _deletion_limit_reached(self) -> bool:
if (

Check warning on line 290 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L289-L290

Added lines #L289 - L290 were not covered by tests
self.config.limit_entities_delete
and self.report.num_hard_deleted > self.config.limit_entities_delete
):
logger.info(

Check warning on line 294 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L294

Added line #L294 was not covered by tests
f"Limit of {self.config.limit_entities_delete} entities reached"
)
anshbansal marked this conversation as resolved.
Show resolved Hide resolved
return True
return False

Check warning on line 298 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L297-L298

Added lines #L297 - L298 were not covered by tests

def cleanup_soft_deleted_entities(self) -> None:
if not self.config.enabled:
return
Expand All @@ -285,24 +308,8 @@
self._print_report()
while len(futures) >= self.config.futures_max_at_time:
futures = self._process_futures(futures)
if (
self.config.limit_entities_delete
and self.report.num_hard_deleted > self.config.limit_entities_delete
):
logger.info(
f"Limit of {self.config.limit_entities_delete} entities reached. Stopped adding more."
)
if self._deletion_limit_reached() or self._times_up():

Check warning on line 311 in metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py#L311

Added line #L311 was not covered by tests
break
if (
self.config.runtime_limit_seconds
and time.time() - self.start_time
> self.config.runtime_limit_seconds
):
logger.info(
f"Runtime limit of {self.config.runtime_limit_seconds} seconds reached. Not submitting more futures."
)
break

future = executor.submit(self.delete_soft_deleted_entity, urn)
futures[future] = urn

Expand Down
Loading