Skip to content

Commit

Permalink
Taught import to throttle at 25% of available workers.
Browse files Browse the repository at this point in the history
When we expose  IMPORT_WORKERS_PERCENT, this becomes a configurable percentage.

fixes pulp#4068.
  • Loading branch information
ggainey committed Jul 26, 2023
1 parent ec1b2da commit d2c5f00
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGES/4068.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Taught pulp-import to never use more than 25% of available worker-threads.
21 changes: 17 additions & 4 deletions pulpcore/app/tasks/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from gettext import gettext as _
from logging import getLogger

from django.conf import settings
from django.core.files.storage import default_storage
from django.db.models import F
from naya.json import stream_array, tokenize
Expand Down Expand Up @@ -35,6 +36,7 @@
RepositoryResource,
)
from pulpcore.constants import TASK_STATES
from pulpcore.tasking.pulpcore_worker import Worker
from pulpcore.tasking.tasks import dispatch

from pulpcore.plugin.importexport import BaseContentResource
Expand Down Expand Up @@ -506,6 +508,15 @@ def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
default_storage.save(base_path, f)

# Now import repositories, in parallel.

# We want to limit the number of available-workers that import will consume, so that
# pulp can continue to work while doing an import. We accomplish this by creating a
# reserved-resource string for each repo-import-task based on that repo's index in
# the dispatch loop, mod number-of-workers-to-consume.
import_workers_percent = int(settings.get("IMPORT_WORKERS_PERCENT", 25))
total_workers = Worker.objects.online_workers().count()
import_workers = max(1, int(total_workers * (import_workers_percent / 100.0)))

with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
data = json.load(repo_data_file)
gpr = GroupProgressReport(
Expand All @@ -517,14 +528,16 @@ def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
)
gpr.save()

for src_repo in data:
for index, src_repo in enumerate(data):
# Lock the repo we're importing-into
dest_repo_name = _get_destination_repo_name(importer, src_repo["name"])

# pulpcore-worker limiter
worker_rsrc = f"import-worker-{index % import_workers}"
try:
dest_repo = Repository.objects.get(name=dest_repo_name)
except Repository.DoesNotExist:
if create_repositories:
exclusive_resources = []
exclusive_resources = [worker_rsrc]
dest_repo_pk = ""
else:
log.warning(
Expand All @@ -534,7 +547,7 @@ def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
)
continue
else:
exclusive_resources = [dest_repo]
exclusive_resources = [dest_repo, worker_rsrc]
dest_repo_pk = dest_repo.pk

dispatch(
Expand Down

0 comments on commit d2c5f00

Please sign in to comment.