Skip to content

Commit

Permalink
fix(ingest): profiling - memory usage reduction (datahub-project#5830)
Browse files Browse the repository at this point in the history
  • Loading branch information
shirshanka committed Sep 8, 2022
1 parent f6e3cf3 commit ad3b7b4
Showing 1 changed file with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import collections
import concurrent.futures
import contextlib
import dataclasses
Expand Down Expand Up @@ -747,7 +748,7 @@ def generate_profiles(
"great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyDataset._get_column_quantiles_bigquery",
_get_column_quantiles_bigquery_patch,
):
async_profiles = [
async_profiles = collections.deque(
async_executor.submit(
self._generate_profile_from_request,
query_combiner,
Expand All @@ -756,12 +757,13 @@ def generate_profiles(
profiler_args=profiler_args,
)
for request in requests
]
)

# Avoid using as_completed so that the results are yielded in the
# same order as the requests.
# for async_profile in concurrent.futures.as_completed(async_profiles):
for async_profile in async_profiles:
while len(async_profiles) > 0:
async_profile = async_profiles.popleft()
yield async_profile.result()

total_time_taken = timer.elapsed_seconds()
Expand Down

0 comments on commit ad3b7b4

Please sign in to comment.