From ec4d937ced9adff351e3ec452528f84bcd20da2c Mon Sep 17 00:00:00 2001 From: Sven Marnach Date: Wed, 10 Jul 2024 13:05:07 +0200 Subject: [PATCH] bug-1906959: Allow GCS buckets to be behind a CDN. --- docker-compose.yml | 10 ++++++++++ docker/config/local_dev.env | 1 + docker/images/gcs-cdn/Dockerfile | 2 ++ docker/images/gcs-cdn/default.conf | 8 ++++++++ tecken/download/views.py | 5 +++-- tecken/ext/gcs/storage.py | 11 ++++++++++- tecken/settings.py | 6 ++++++ tecken/tests/conftest.py | 9 +++++++-- tecken/tests/test_storage_backends.py | 15 +++++++++------ tecken/tests/utils.py | 2 +- 10 files changed, 57 insertions(+), 12 deletions(-) create mode 100644 docker/images/gcs-cdn/Dockerfile create mode 100644 docker/images/gcs-cdn/default.conf diff --git a/docker-compose.yml b/docker-compose.yml index 81ae2f16b..c2cf485e6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -168,6 +168,16 @@ services: interval: 1s timeout: 3s retries: 5 + depends_on: + - gcs-cdn + + # nginx as a reverse proxy simulating a CDN in front of the GCS emulator. + gcs-cdn: + build: + context: docker/images/gcs-cdn + image: local/tecken_gcs_cdn + ports: + - "${EXPOSE_CDN_PORT:-8002}:8002" # https://hub.docker.com/r/localstack/localstack/ # localstack running a fake AWS S3 diff --git a/docker/config/local_dev.env b/docker/config/local_dev.env index f970a53a3..3a2438934 100644 --- a/docker/config/local_dev.env +++ b/docker/config/local_dev.env @@ -33,6 +33,7 @@ DEBUG=true LOCAL_DEV_ENV=true CLOUD_SERVICE_PROVIDER=GCS UPLOAD_GCS_BUCKET=publicbucket +UPLOAD_GCS_PUBLIC_URL=http://gcs-cdn:8002/publicbucket UPLOAD_S3_BUCKET=publicbucket # Default to the test oidcprovider container for Open ID Connect diff --git a/docker/images/gcs-cdn/Dockerfile b/docker/images/gcs-cdn/Dockerfile new file mode 100644 index 000000000..7ab8af11e --- /dev/null +++ b/docker/images/gcs-cdn/Dockerfile @@ -0,0 +1,2 @@ +FROM nginx:1.27-alpine +COPY default.conf /etc/nginx/conf.d/default.conf diff --git a/docker/images/gcs-cdn/default.conf b/docker/images/gcs-cdn/default.conf new file mode 100644 index 000000000..6bc668a85 --- /dev/null +++ b/docker/images/gcs-cdn/default.conf @@ -0,0 +1,8 @@ +server { + listen 8002; + server_name cdn; + + location / { + proxy_pass http://gcs-emulator:8001; + } +} diff --git a/tecken/download/views.py b/tecken/download/views.py index 97a0ddf07..230ec69cb 100644 --- a/tecken/download/views.py +++ b/tecken/download/views.py @@ -156,13 +156,14 @@ def download_symbol(request, debugfilename, debugid, filename, try_symbols=False ) if metadata: url = metadata.download_url - if "http://localstack:4566" in url and request.get_host() == "localhost:8000": + if request.get_host() == "localhost:8000": # If doing local development, with Docker, you're most likely running # localstack as a fake S3. It runs on its own hostname that is only # available from other Docker containers. But to make it really convenient, # for testing symbol download we'll rewrite the URL to one that is possible # to reach from the host. - url = url.replace("localstack:4566", "localhost:4566") + url = url.replace("http://gcs-cdn:8002/", "http://localhost:8002/") + url = url.replace("http://localstack:4566/", "http://localhost:4566/") response = http.HttpResponseRedirect(url) if request._request_debug: response["Debug-Time"] = elapsed_time diff --git a/tecken/ext/gcs/storage.py b/tecken/ext/gcs/storage.py index f732cfde0..22c3599e4 100644 --- a/tecken/ext/gcs/storage.py +++ b/tecken/ext/gcs/storage.py @@ -27,11 +27,16 @@ def __init__( prefix: str, try_symbols: bool = False, endpoint_url: Optional[str] = None, + public_url: Optional[str] = None, ): self.bucket = bucket self.prefix = prefix self.try_symbols = try_symbols self.endpoint_url = endpoint_url + if public_url: + self.public_url = public_url.removesuffix("/") + else: + self.public_url = None self.clients = threading.local() # The Cloud Storage client doesn't support setting global timeouts for all requests, so we # need to pass the timeout for every single request. the default timeout is 60 seconds for @@ -106,8 +111,12 @@ def get_object_metadata(self, key: str) -> Optional[ObjectMetadata]: original_content_length = int(original_content_length) except ValueError: original_content_length = None + if self.public_url: + download_url = f"{self.public_url}/{quote(gcs_key)}" + else: + download_url = blob.public_url metadata = ObjectMetadata( - download_url=blob.public_url, + download_url=download_url, content_type=blob.content_type, content_length=blob.size, content_encoding=blob.content_encoding, diff --git a/tecken/settings.py b/tecken/settings.py index 62a9e2b09..20a4a0484 100644 --- a/tecken/settings.py +++ b/tecken/settings.py @@ -577,6 +577,10 @@ def filter(self, record): "UPLOAD_GCS_BUCKET", doc="The GCS bucket name for uploads and downloads.", ) + UPLOAD_GCS_PUBLIC_URL = _config( + "UPLOAD_GCS_PUBLIC_URL", + doc="The base URL for downloading files from the upload bucket.", + ) DOWNLOAD_S3_BUCKET = _config( "DOWNLOAD_S3_BUCKET", raise_error=False, @@ -596,6 +600,7 @@ def filter(self, record): "bucket": UPLOAD_GCS_BUCKET, "prefix": "v1", "try_symbols": False, + "public_url": UPLOAD_GCS_PUBLIC_URL, }, } TRY_UPLOAD_BACKEND = { @@ -604,6 +609,7 @@ def filter(self, record): "bucket": UPLOAD_GCS_BUCKET, "prefix": "try/v1", "try_symbols": True, + "public_url": UPLOAD_GCS_PUBLIC_URL, }, } DOWNLOAD_BACKENDS = [] diff --git a/tecken/tests/conftest.py b/tecken/tests/conftest.py index 5c83f5ca5..a31911d85 100644 --- a/tecken/tests/conftest.py +++ b/tecken/tests/conftest.py @@ -151,19 +151,24 @@ def get_storage_backend(bucket_name): """Return a function to create a unique storage backend for the current test.""" def _get_storage_backend( - kind: Literal["gcs", "s3"], try_symbols: bool = False + kind: Literal["gcs", "gcs-cdn", "s3"], try_symbols: bool = False ) -> StorageBackend: prefix = "try/" * try_symbols + "v1" match kind: case "gcs": return GCSStorage(bucket_name, prefix, try_symbols) + case "gcs-cdn": + public_url = f"http://gcs-cdn:8002/{bucket_name}" + return GCSStorage( + bucket_name, prefix, try_symbols, public_url=public_url + ) case "s3": return S3Storage(bucket_name, prefix, try_symbols) return _get_storage_backend -@pytest.fixture(params=["gcs", "s3"]) +@pytest.fixture(params=["gcs", "gcs-cdn", "s3"]) def symbol_storage_no_create(request, get_storage_backend): """Replace the global SymbolStorage instance with a new instance. diff --git a/tecken/tests/test_storage_backends.py b/tecken/tests/test_storage_backends.py index 8587a3db0..5ff0f336b 100644 --- a/tecken/tests/test_storage_backends.py +++ b/tecken/tests/test_storage_backends.py @@ -12,10 +12,13 @@ from tecken.tests.utils import Upload, UPLOADS +@pytest.mark.parametrize("try_storage", [False, True]) @pytest.mark.parametrize("upload", UPLOADS.values(), ids=UPLOADS.keys()) -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) -def test_upload_and_download(get_storage_backend, storage_kind: str, upload: Upload): - backend = get_storage_backend(storage_kind) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) +def test_upload_and_download( + get_storage_backend, storage_kind: str, upload: Upload, try_storage: bool +): + backend = get_storage_backend(storage_kind, try_storage) backend.clear() assert backend.exists() @@ -35,20 +38,20 @@ def test_upload_and_download(get_storage_backend, storage_kind: str, upload: Upl assert metadata.original_md5_sum == upload.metadata.original_md5_sum -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_non_exsiting_bucket(get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) assert not backend.exists() -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_storageerror_msg(get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) error = StorageError("storage error message", backend=backend) assert repr(backend) in str(error) -@pytest.mark.parametrize("storage_kind", ["gcs", "s3"]) +@pytest.mark.parametrize("storage_kind", ["gcs", "gcs-cdn", "s3"]) def test_s3_download_url(bucket_name: str, get_storage_backend, storage_kind: str): backend = get_storage_backend(storage_kind) backend.clear() diff --git a/tecken/tests/utils.py b/tecken/tests/utils.py index ec089f4d4..4f6f457b9 100644 --- a/tecken/tests/utils.py +++ b/tecken/tests/utils.py @@ -35,7 +35,7 @@ class Upload: backend: Optional[StorageBackend] = None @property - def key(self): + def key(self) -> str: return SymbolStorage.make_key(self.debug_file, self.debug_id, self.sym_file) @classmethod