reef-technologies · kris-konina-reef · Sep 30, 2024 · Sep 14, 2024 · Sep 14, 2024 · Sep 16, 2024
diff --git a/changelog.d/+persistent_bucket.added.md b/changelog.d/+persistent_bucket.added.md
@@ -0,0 +1 @@
+Add persistent bucket fixtures for integration tests.
diff --git a/changelog.d/+persistent_bucket_aggregate.added.md b/changelog.d/+persistent_bucket_aggregate.added.md
@@ -0,0 +1 @@
+Introduce PersistentBucketAggregate class to manage bucket name and subfolder.
diff --git a/changelog.d/+persistent_bucket_utils.added.md b/changelog.d/+persistent_bucket_utils.added.md
@@ -0,0 +1 @@
+Add utility functions for managing persistent buckets.
diff --git a/changelog.d/+update_integration_tests.changed.md b/changelog.d/+update_integration_tests.changed.md
@@ -0,0 +1 @@
+Update integration tests to use persistent buckets.
diff --git a/test/integration/cleanup_buckets.py b/test/integration/cleanup_buckets.py
@@ -7,10 +7,16 @@
 # License https://www.backblaze.com/using_b2_code.html
 #
 ######################################################################
+from .persistent_bucket import get_or_create_persistent_bucket
 
 
 def test_cleanup_buckets(b2_api):
     # this is not a test, but it is intended to be called
     # via pytest because it reuses fixtures which have everything
-    # set up
-    pass  # b2_api calls b2_api.clean_buckets() in its finalizer
+    # set up.
+    # The persistent bucket is cleared manually now and not
+    # when tests tear down, as otherwise we'd lose the main benefit
+    # of a persistent bucket, whose identity is shared across tests.
+    persistent_bucket = get_or_create_persistent_bucket(b2_api)
+    b2_api.clean_bucket(persistent_bucket)
+    b2_api.api.list_buckets()
diff --git a/test/integration/conftest.py b/test/integration/conftest.py
@@ -16,11 +16,14 @@
 import subprocess
 import sys
 import tempfile
+import uuid
+from contextlib import suppress
 from os import environ, path
 from tempfile import TemporaryDirectory
 
 import pytest
 from b2sdk.v2 import B2_ACCOUNT_INFO_ENV_VAR, XDG_CONFIG_HOME_ENV_VAR, Bucket
+from b2sdk.v2.exception import NonExistentBucket
 
 from b2._internal.version_listing import (
     CLI_VERSIONS,
@@ -31,6 +34,11 @@
 
 from ..helpers import b2_uri_args_v3, b2_uri_args_v4
 from .helpers import NODE_DESCRIPTION, RNG_SEED, Api, CommandLine, bucket_name_part, random_token
+from .persistent_bucket import (
+    PersistentBucketAggregate,
+    delete_files,
+    get_or_create_persistent_bucket,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -402,3 +410,33 @@ def b2_uri_args(apiver_int):
         return b2_uri_args_v4
     else:
         return b2_uri_args_v3
+
+
+# -- Persistent bucket fixtures --
+@pytest.fixture
+def persistent_bucket(b2_api) -> Bucket:
+    return get_or_create_persistent_bucket(b2_api)
+
+
+@pytest.fixture
+def unique_subfolder():
+    subfolder = f"test-{uuid.uuid4().hex[:8]}"
+    yield subfolder
+
+
+@pytest.fixture
+def persistent_bucket_aggregate(
+    persistent_bucket, unique_subfolder, b2_api
+) -> PersistentBucketAggregate:
+    """
+    Since all consumers of the `bucket_name` fixture expect a new bucket to be created,
+    we need to mirror this behavior by appending a unique subfolder to the persistent bucket name.
+    """
+    persistent_bucket_aggregate = PersistentBucketAggregate(
+        persistent_bucket.name, unique_subfolder
+    )
+    yield persistent_bucket_aggregate
+    # Clean up all files in the persistent bucket after each test
+    with suppress(NonExistentBucket):
+        bucket = b2_api.api.get_bucket_by_name(persistent_bucket_aggregate.bucket_name)
+        delete_files(bucket, persistent_bucket_aggregate.subfolder)
diff --git a/test/integration/helpers.py b/test/integration/helpers.py
@@ -188,7 +188,6 @@ def _should_remove_bucket(self, bucket: Bucket) -> tuple[bool, str]:
     def clean_buckets(self, quick=False):
         # even with use_cache=True, if cache is empty API call will be made
         buckets = self.api.list_buckets(use_cache=quick)
-        print('Total bucket count:', len(buckets))
         remaining_buckets = []
         for bucket in buckets:
             should_remove, why = self._should_remove_bucket(bucket)
@@ -539,9 +538,9 @@ def reauthorize(self, check_key_capabilities=False):
             } - private_preview_caps - set(auth_dict['allowed']['capabilities'])
             assert not missing_capabilities, f'it appears that the raw_api integration test is being run with a non-full key. Missing capabilities: {missing_capabilities}'
 
-    def list_file_versions(self, bucket_name):
+    def list_file_versions(self, bucket_name, path=''):
         return self.should_succeed_json(
-            ['ls', '--json', '--recursive', '--versions', *self.b2_uri_args(bucket_name)]
+            ['ls', '--json', '--recursive', '--versions', *self.b2_uri_args(bucket_name, path)]
         )
 
     def cleanup_buckets(self, buckets: dict[str, dict | None]) -> None:

diff --git a/test/integration/persistent_bucket.py b/test/integration/persistent_bucket.py
@@ -0,0 +1,92 @@
+######################################################################
+#
+# File: test/integration/persistent_bucket.py
+#
+# Copyright 2024 Backblaze Inc. All Rights Reserved.
+#
+# License https://www.backblaze.com/using_b2_code.html
+#
+######################################################################
+import hashlib
+import os
+from dataclasses import dataclass
+from functools import cached_property
+from test.integration.helpers import BUCKET_NAME_LENGTH, Api
+
+import backoff
+from b2sdk.v2 import Bucket
+from b2sdk.v2.exception import DuplicateBucketName, NonExistentBucket
+
+PERSISTENT_BUCKET_NAME_PREFIX = "constst"
+
+
+@dataclass
+class PersistentBucketAggregate:
+    bucket_name: str
+    subfolder: str
+
+    @cached_property
+    def virtual_bucket_name(self):
+        return f"{self.bucket_name}/{self.subfolder}"
+
+
+@backoff.on_exception(backoff.expo, Exception, max_tries=3, max_time=10)
+def delete_all_files(bucket: Bucket):
+    all_items = list(bucket.ls(recursive=True))
+    for item, _ in all_items:
+        bucket.delete_file_version(item.id_, item.file_name)
+
+
+@backoff.on_exception(backoff.expo, Exception, max_tries=3, max_time=10)
+def delete_files(bucket: Bucket, subfolder: str):
+    for file_version, _ in bucket.ls(recursive=True, folder_to_list=subfolder):
+        bucket.delete_file_version(file_version.id_, file_version.file_name)
+
+
+def cleanup_persistent_bucket(b2_api: Api):
+    all_buckets = b2_api.api.list_buckets()
+    for bucket in all_buckets:
+        if bucket.name.startswith(PERSISTENT_BUCKET_NAME_PREFIX):
+            print(f"Deleting all files in bucket {bucket.name}")
+            delete_all_files(bucket)
+
+
+def get_persistent_bucket_name(b2_api: Api) -> str:
+    if "CI" in os.environ:
+        # CI environment
+        repo_id = os.environ.get("GITHUB_REPOSITORY_ID")
+        if not repo_id:
+            raise ValueError("GITHUB_REPOSITORY_ID is not set")
+        bucket_hash = hashlib.sha256(repo_id.encode()).hexdigest()
+    else:
+        # Local development
+        bucket_hash = hashlib.sha256(b2_api.account_id.encode()).hexdigest()
+
+    return f"{PERSISTENT_BUCKET_NAME_PREFIX}-{bucket_hash}" [:BUCKET_NAME_LENGTH]
+
+
+@backoff.on_exception(
+    backoff.expo,
+    DuplicateBucketName,
+    max_tries=3,
+    jitter=backoff.full_jitter,
+)
+def get_or_create_persistent_bucket(b2_api: Api) -> Bucket:
+    bucket_name = get_persistent_bucket_name(b2_api)
+    try:
+        bucket = b2_api.api.get_bucket_by_name(bucket_name)
+    except NonExistentBucket:
+        bucket = b2_api.api.create_bucket(
+            bucket_name,
+            bucket_type="allPublic",
+            lifecycle_rules=[
+                {
+                    "daysFromHidingToDeleting": 1,
+                    "daysFromUploadingToHiding": 14,
+                    "fileNamePrefix": "",
+                }
+            ],
+        )
+    # add the new bucket name to the list of bucket names
+    b2_api.bucket_name_log.append(bucket_name)
+    return bucket