From 3ba330b4a8e16070dc1074654f05402c4759e852 Mon Sep 17 00:00:00 2001 From: Stefan Heinemann Date: Mon, 9 Dec 2024 16:03:00 +0100 Subject: [PATCH 1/2] PB-1091 Add count to size updater, exclude is_external Make it possible to only update a chunk of assets at once, so the command will hopefully stop timing out. Also, exclude the external assets. --- .../commands/update_asset_file_size.py | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/app/stac_api/management/commands/update_asset_file_size.py b/app/stac_api/management/commands/update_asset_file_size.py index f0e01423..4661fa4d 100644 --- a/app/stac_api/management/commands/update_asset_file_size.py +++ b/app/stac_api/management/commands/update_asset_file_size.py @@ -4,6 +4,7 @@ from django.conf import settings from django.core.management.base import BaseCommand +from django.core.management.base import CommandParser from stac_api.models import Asset from stac_api.models import CollectionAsset @@ -18,38 +19,59 @@ class Handler(CommandHandler): def update(self): - self.print_success('running command to update file size') + self.print_success('Running command to update file size') + + asset_limit = self.options['count'] + + asset_qs = Asset.objects.filter(file_size=0, is_external=False) + total_asset_count = asset_qs.count() + assets = asset_qs.all()[:asset_limit] + + self.print_success(f'Update file size for {len(assets)} assets out of {total_asset_count}') - self.print_success('update file size for assets') - assets = Asset.objects.filter(file_size=0).all() for asset in assets: selected_bucket = select_s3_bucket(asset.item.collection.name) s3 = get_s3_client(selected_bucket) bucket = settings.AWS_SETTINGS[selected_bucket.name]['S3_BUCKET_NAME'] key = SharedAssetUploadBase.get_path(None, asset) + try: file_size = s3.head_object(Bucket=bucket, Key=key)['ContentLength'] asset.file_size = file_size asset.save() + print(".", end="", flush=True) except ClientError: - logger.error('file size could not be read from s3 bucket for asset %s', key) + logger.error( + 'file size could not be read from s3 bucket [%s] for asset %s', bucket, key + ) + print() + + collection_asset_qs = CollectionAsset.objects.filter(file_size=0) + total_asset_count = collection_asset_qs.count() + collection_assets = collection_asset_qs.all()[:asset_limit] + + self.print_success( + f"Update file size for {len(collection_assets)} collection assets out of " + "{total_asset_count}" + ) - self.print_success('update file size for collection assets') - collection_assets = CollectionAsset.objects.filter(file_size=0).all() for collection_asset in collection_assets: selected_bucket = select_s3_bucket(collection_asset.collection.name) s3 = get_s3_client(selected_bucket) bucket = settings.AWS_SETTINGS[selected_bucket.name]['S3_BUCKET_NAME'] key = SharedAssetUploadBase.get_path(None, collection_asset) + try: file_size = s3.head_object(Bucket=bucket, Key=key)['ContentLength'] collection_asset.file_size = file_size collection_asset.save() + print(".", end="", flush=True) except ClientError: logger.error( - 'file size could not be read from s3 bucket for collection asset %s', key + 'file size could not be read from s3 bucket [%s] for collection asset %s' ) + print() self.print_success('Update completed') @@ -57,5 +79,15 @@ class Command(BaseCommand): help = """Requests the file size of every asset / collection asset from the s3 bucket and updates the value in the database""" + def add_arguments(self, parser: CommandParser) -> None: + super().add_arguments(parser) + parser.add_argument( + '-c', + '--count', + help="The amount of assets to process at once", + required=True, + type=int + ) + def handle(self, *args, **options): Handler(self, options).update() From 8c45f9ce18b5986385d03bd168ec3234be28cc84 Mon Sep 17 00:00:00 2001 From: Stefan Heinemann Date: Mon, 9 Dec 2024 17:47:15 +0100 Subject: [PATCH 2/2] PB-1091 Change update file size boto3 debug level --- app/stac_api/management/commands/update_asset_file_size.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/stac_api/management/commands/update_asset_file_size.py b/app/stac_api/management/commands/update_asset_file_size.py index 4661fa4d..8e2b03d3 100644 --- a/app/stac_api/management/commands/update_asset_file_size.py +++ b/app/stac_api/management/commands/update_asset_file_size.py @@ -15,6 +15,10 @@ logger = logging.getLogger(__name__) +# increase the log level so boto3 doesn't spam the output +logging.getLogger('boto3').setLevel(logging.WARNING) +logging.getLogger('botocore').setLevel(logging.WARNING) + class Handler(CommandHandler):