From 8b65674e0a9f49e42332b7e25dc1b5f808082b19 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Wed, 11 Dec 2024 19:16:17 +0100 Subject: [PATCH] chore: make S3 dependencies optional --- docs/config/storage.md | 10 +- umap/management/commands/migrate_to_S3.py | 6 +- umap/settings/base.py | 4 +- umap/storage.py | 216 ---------------------- umap/storage/__init__.py | 3 + umap/storage/fs.py | 101 ++++++++++ umap/storage/s3.py | 61 ++++++ umap/storage/staticfiles.py | 64 +++++++ umap/tests/test_datalayer_s3.py | 2 +- umap/tests/test_statics.py | 2 +- 10 files changed, 241 insertions(+), 228 deletions(-) delete mode 100644 umap/storage.py create mode 100644 umap/storage/__init__.py create mode 100644 umap/storage/fs.py create mode 100644 umap/storage/s3.py create mode 100644 umap/storage/staticfiles.py diff --git a/docs/config/storage.md b/docs/config/storage.md index d0064e275..140c53fbc 100644 --- a/docs/config/storage.md +++ b/docs/config/storage.md @@ -10,7 +10,7 @@ This can be configured through the `STORAGES` settings. uMap will use three keys but by default uses a custom storage that will add hash to the filenames, to be sure they are not kept in any cache after a release - `data`, used to store the layers data. This one should follow the uMap needs, and currently - uMap provides only two options: `umap.storage.UmapFileSystem` and `umap.storage.UmapS3` + uMap provides only two options: `umap.storage.fs.FSDataStorage` and `umap.storage.s3.S3DataStorage` ## Default settings: @@ -22,10 +22,10 @@ STORAGES = { "BACKEND": "django.core.files.storage.FileSystemStorage", }, "data": { - "BACKEND": "umap.storage.UmapFileSystem", + "BACKEND": "umap.storage.fs.FSDataStorage", }, "staticfiles": { - "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", + "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage", }, } ``` @@ -43,7 +43,7 @@ STORAGES = { "BACKEND": "django.core.files.storage.FileSystemStorage", }, "data": { - "BACKEND": "umap.storage.UmapS3", + "BACKEND": "umap.storage.s3.S3DataStorage", "OPTIONS": { "access_key": "xxx", "secret_key": "yyy", @@ -53,7 +53,7 @@ STORAGES = { }, }, "staticfiles": { - "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", + "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage", }, } ``` diff --git a/umap/management/commands/migrate_to_S3.py b/umap/management/commands/migrate_to_S3.py index a1ccd9bb3..b29135a9a 100644 --- a/umap/management/commands/migrate_to_S3.py +++ b/umap/management/commands/migrate_to_S3.py @@ -2,7 +2,7 @@ from django.core.management.base import BaseCommand from umap.models import DataLayer -from umap.storage import UmapFileSystem +from umap.storage.fs import FSDataStorage class Command(BaseCommand): @@ -11,9 +11,9 @@ class Command(BaseCommand): def handle(self, *args, **options): assert settings.UMAP_READONLY, "You must run that script with a read-only uMap." assert ( - settings.STORAGES["data"]["BACKEND"] == "umap.storage.UmapS3" + settings.STORAGES["data"]["BACKEND"] == "umap.storage.s3.S3DataStorage" ), "You must configure your storages to point to S3" - fs_storage = UmapFileSystem() + fs_storage = FSDataStorage() for datalayer in DataLayer.objects.all(): geojson_fs_path = str(datalayer.geojson) try: diff --git a/umap/settings/base.py b/umap/settings/base.py index c4f59096e..f47ad2361 100644 --- a/umap/settings/base.py +++ b/umap/settings/base.py @@ -176,10 +176,10 @@ "BACKEND": "django.core.files.storage.FileSystemStorage", }, "data": { - "BACKEND": "umap.storage.UmapFileSystem", + "BACKEND": "umap.storage.fs.FSDataStorage", }, "staticfiles": { - "BACKEND": "umap.storage.UmapManifestStaticFilesStorage", + "BACKEND": "umap.storage.staticfiles.UmapManifestStaticFilesStorage", }, } # Add application/json and application/geo+json to default django-storages setting diff --git a/umap/storage.py b/umap/storage.py deleted file mode 100644 index 26cd84d5b..000000000 --- a/umap/storage.py +++ /dev/null @@ -1,216 +0,0 @@ -import operator -import os -import shutil -import time -from gzip import GzipFile -from pathlib import Path - -from botocore.exceptions import ClientError -from django.conf import settings -from django.contrib.staticfiles.storage import ManifestStaticFilesStorage -from django.core.files.storage import FileSystemStorage -from rcssmin import cssmin -from rjsmin import jsmin -from storages.backends.s3 import S3Storage - - -class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage): - support_js_module_import_aggregation = True - max_post_process_passes = 15 - - # We remove `;` at the end of all regexps to match our biome config. - _js_module_import_aggregation_patterns = ( - "*.js", - ( - ( - ( - r"""(?Pimport(?s:(?P[\s\{].*?))""" - r"""\s*from\s*['"](?P[\.\/].*?)["']\s*)""" - ), - 'import%(import)s from "%(url)s"\n', - ), - ( - ( - r"""(?Pexport(?s:(?P[\s\{].*?))""" - r"""\s*from\s*["'](?P[\.\/].*?)["']\s*)""" - ), - 'export%(exports)s from "%(url)s"\n', - ), - ( - r"""(?Pimport\s*['"](?P[\.\/].*?)["']\s*)""", - 'import"%(url)s"\n', - ), - ( - r"""(?Pimport\(["'](?P.*?)["']\)\.then)""", - """import("%(url)s").then""", - ), - ( - r"""(?Pawait import\(["'](?P.*?)["']\))""", - """await import("%(url)s")""", - ), - ), - ) - - def post_process(self, paths, **options): - collected = super().post_process(paths, **options) - for original_path, processed_path, processed in collected: - if isinstance(processed, Exception): - print("Error with file", original_path) - raise processed - if processed_path.endswith(".js"): - path = Path(settings.STATIC_ROOT) / processed_path - initial = path.read_text() - if "sourceMappingURL" not in initial: # Already minified. - minified = jsmin(initial) - path.write_text(minified) - if processed_path.endswith(".css"): - path = Path(settings.STATIC_ROOT) / processed_path - initial = path.read_text() - if "sourceMappingURL" not in initial: # Already minified. - minified = cssmin(initial) - path.write_text(minified) - yield original_path, processed_path, True - - -class UmapS3(S3Storage): - gzip = True - - def get_reference_version(self, instance): - metadata = self.connection.meta.client.head_object( - Bucket=self.bucket_name, Key=instance.geojson.name - ) - # Do not fail if bucket does not handle versioning - return metadata.get("VersionId", metadata["ETag"]) - - def make_filename(self, instance): - return f"{str(instance.pk)}.geojson" - - def list_versions(self, instance): - response = self.connection.meta.client.list_object_versions( - Bucket=self.bucket_name, Prefix=instance.geojson.name - ) - return [ - { - "ref": version["VersionId"], - "at": version["LastModified"].timestamp() * 1000, - "size": version["Size"], - } - for version in response["Versions"] - ] - - def get_version(self, ref, instance): - try: - data = self.connection.meta.client.get_object( - Bucket=self.bucket_name, - Key=instance.geojson.name, - VersionId=ref, - ) - except ClientError: - raise ValueError(f"Invalid version reference: {ref}") - return GzipFile(mode="r", fileobj=data["Body"]).read() - - def get_version_path(self, ref, instance): - return self.url(instance.geojson.name, parameters={"VersionId": ref}) - - def onDatalayerSave(self, instance): - pass - - def onDatalayerDelete(self, instance): - return self.connection.meta.client.delete_object( - Bucket=self.bucket_name, - Key=instance.geojson.name, - ) - - -class UmapFileSystem(FileSystemStorage): - def get_reference_version(self, instance): - return self._extract_version_ref(instance.geojson.name) - - def make_filename(self, instance): - root = self._base_path(instance) - name = "%s_%s.geojson" % (instance.pk, int(time.time() * 1000)) - return root / name - - def list_versions(self, instance): - root = self._base_path(instance) - names = self.listdir(root)[1] - names = [name for name in names if self._is_valid_version(name, instance)] - versions = [self._version_metadata(name, instance) for name in names] - versions.sort(reverse=True, key=operator.itemgetter("at")) - return versions - - def get_version(self, ref, instance): - with self.open(self.get_version_path(ref, instance), "r") as f: - return f.read() - - def get_version_path(self, ref, instance): - base_path = Path(settings.MEDIA_ROOT) / self._base_path(instance) - fullpath = base_path / f"{instance.pk}_{ref}.geojson" - if instance.old_id and not fullpath.exists(): - fullpath = base_path / f"{instance.old_id}_{ref}.geojson" - if not fullpath.exists(): - raise ValueError(f"Invalid version reference: {ref}") - return fullpath - - def onDatalayerSave(self, instance): - self._purge_gzip(instance) - self._purge_old_versions(instance, keep=settings.UMAP_KEEP_VERSIONS) - - def onDatalayerDelete(self, instance): - self._purge_gzip(instance) - self._purge_old_versions(instance, keep=None) - - def _extract_version_ref(self, path): - version = path.split(".")[0] - if "_" in version: - return version.split("_")[-1] - return version - - def _base_path(self, instance): - path = ["datalayer", str(instance.map.pk)[-1]] - if len(str(instance.map.pk)) > 1: - path.append(str(instance.map.pk)[-2]) - path.append(str(instance.map.pk)) - return Path(os.path.join(*path)) - - def _is_valid_version(self, name, instance): - valid_prefixes = [name.startswith("%s_" % instance.pk)] - if instance.old_id: - valid_prefixes.append(name.startswith("%s_" % instance.old_id)) - return any(valid_prefixes) and name.endswith(".geojson") - - def _version_metadata(self, name, instance): - ref = self._extract_version_ref(name) - return { - "name": name, - "ref": ref, - "at": ref, - "size": self.size(self._base_path(instance) / name), - } - - def _purge_old_versions(self, instance, keep=None): - root = self._base_path(instance) - versions = self.list_versions(instance) - if keep is not None: - versions = versions[keep:] - for version in versions: - name = version["name"] - # Should not be in the list, but ensure to not delete the file - # currently used in database - if keep is not None and instance.geojson.name.endswith(name): - continue - try: - self.delete(root / name) - except FileNotFoundError: - pass - - def _purge_gzip(self, instance): - root = self._base_path(instance) - names = self.listdir(root)[1] - prefixes = [f"{instance.pk}_"] - if instance.old_id: - prefixes.append(f"{instance.old_id}_") - prefixes = tuple(prefixes) - for name in names: - if name.startswith(prefixes) and name.endswith(".gz"): - self.delete(root / name) diff --git a/umap/storage/__init__.py b/umap/storage/__init__.py new file mode 100644 index 000000000..b0d0f522e --- /dev/null +++ b/umap/storage/__init__.py @@ -0,0 +1,3 @@ +# Retrocompat + +from .staticfiles import UmapManifestStaticFilesStorage # noqa: F401 diff --git a/umap/storage/fs.py b/umap/storage/fs.py new file mode 100644 index 000000000..39ee1bbd9 --- /dev/null +++ b/umap/storage/fs.py @@ -0,0 +1,101 @@ +import operator +import os +import time +from pathlib import Path + +from django.conf import settings +from django.core.files.storage import FileSystemStorage + + +class FSDataStorage(FileSystemStorage): + def get_reference_version(self, instance): + return self._extract_version_ref(instance.geojson.name) + + def make_filename(self, instance): + root = self._base_path(instance) + name = "%s_%s.geojson" % (instance.pk, int(time.time() * 1000)) + return root / name + + def list_versions(self, instance): + root = self._base_path(instance) + names = self.listdir(root)[1] + names = [name for name in names if self._is_valid_version(name, instance)] + versions = [self._version_metadata(name, instance) for name in names] + versions.sort(reverse=True, key=operator.itemgetter("at")) + return versions + + def get_version(self, ref, instance): + with self.open(self.get_version_path(ref, instance), "r") as f: + return f.read() + + def get_version_path(self, ref, instance): + base_path = Path(settings.MEDIA_ROOT) / self._base_path(instance) + fullpath = base_path / f"{instance.pk}_{ref}.geojson" + if instance.old_id and not fullpath.exists(): + fullpath = base_path / f"{instance.old_id}_{ref}.geojson" + if not fullpath.exists(): + raise ValueError(f"Invalid version reference: {ref}") + return fullpath + + def onDatalayerSave(self, instance): + self._purge_gzip(instance) + self._purge_old_versions(instance, keep=settings.UMAP_KEEP_VERSIONS) + + def onDatalayerDelete(self, instance): + self._purge_gzip(instance) + self._purge_old_versions(instance, keep=None) + + def _extract_version_ref(self, path): + version = path.split(".")[0] + if "_" in version: + return version.split("_")[-1] + return version + + def _base_path(self, instance): + path = ["datalayer", str(instance.map.pk)[-1]] + if len(str(instance.map.pk)) > 1: + path.append(str(instance.map.pk)[-2]) + path.append(str(instance.map.pk)) + return Path(os.path.join(*path)) + + def _is_valid_version(self, name, instance): + valid_prefixes = [name.startswith("%s_" % instance.pk)] + if instance.old_id: + valid_prefixes.append(name.startswith("%s_" % instance.old_id)) + return any(valid_prefixes) and name.endswith(".geojson") + + def _version_metadata(self, name, instance): + ref = self._extract_version_ref(name) + return { + "name": name, + "ref": ref, + "at": ref, + "size": self.size(self._base_path(instance) / name), + } + + def _purge_old_versions(self, instance, keep=None): + root = self._base_path(instance) + versions = self.list_versions(instance) + if keep is not None: + versions = versions[keep:] + for version in versions: + name = version["name"] + # Should not be in the list, but ensure to not delete the file + # currently used in database + if keep is not None and instance.geojson.name.endswith(name): + continue + try: + self.delete(root / name) + except FileNotFoundError: + pass + + def _purge_gzip(self, instance): + root = self._base_path(instance) + names = self.listdir(root)[1] + prefixes = [f"{instance.pk}_"] + if instance.old_id: + prefixes.append(f"{instance.old_id}_") + prefixes = tuple(prefixes) + for name in names: + if name.startswith(prefixes) and name.endswith(".gz"): + self.delete(root / name) diff --git a/umap/storage/s3.py b/umap/storage/s3.py new file mode 100644 index 000000000..c9dcb53e3 --- /dev/null +++ b/umap/storage/s3.py @@ -0,0 +1,61 @@ +from gzip import GzipFile + +from django.core.exceptions import ImproperlyConfigured + +try: + from botocore.exceptions import ClientError + from storages.backends.s3 import S3Storage +except ImportError: + raise ImproperlyConfigured( + "You need to install s3 dependencies: pip install umap-project[s3]" + ) + + +class S3DataStorage(S3Storage): + gzip = True + + def get_reference_version(self, instance): + metadata = self.connection.meta.client.head_object( + Bucket=self.bucket_name, Key=instance.geojson.name + ) + # Do not fail if bucket does not handle versioning + return metadata.get("VersionId", metadata["ETag"]) + + def make_filename(self, instance): + return f"{str(instance.pk)}.geojson" + + def list_versions(self, instance): + response = self.connection.meta.client.list_object_versions( + Bucket=self.bucket_name, Prefix=instance.geojson.name + ) + return [ + { + "ref": version["VersionId"], + "at": version["LastModified"].timestamp() * 1000, + "size": version["Size"], + } + for version in response["Versions"] + ] + + def get_version(self, ref, instance): + try: + data = self.connection.meta.client.get_object( + Bucket=self.bucket_name, + Key=instance.geojson.name, + VersionId=ref, + ) + except ClientError: + raise ValueError(f"Invalid version reference: {ref}") + return GzipFile(mode="r", fileobj=data["Body"]).read() + + def get_version_path(self, ref, instance): + return self.url(instance.geojson.name, parameters={"VersionId": ref}) + + def onDatalayerSave(self, instance): + pass + + def onDatalayerDelete(self, instance): + return self.connection.meta.client.delete_object( + Bucket=self.bucket_name, + Key=instance.geojson.name, + ) diff --git a/umap/storage/staticfiles.py b/umap/storage/staticfiles.py new file mode 100644 index 000000000..942956614 --- /dev/null +++ b/umap/storage/staticfiles.py @@ -0,0 +1,64 @@ +from pathlib import Path + +from django.conf import settings +from django.contrib.staticfiles.storage import ManifestStaticFilesStorage +from rcssmin import cssmin +from rjsmin import jsmin + + +class UmapManifestStaticFilesStorage(ManifestStaticFilesStorage): + support_js_module_import_aggregation = True + max_post_process_passes = 15 + + # We remove `;` at the end of all regexps to match our biome config. + _js_module_import_aggregation_patterns = ( + "*.js", + ( + ( + ( + r"""(?Pimport(?s:(?P[\s\{].*?))""" + r"""\s*from\s*['"](?P[\.\/].*?)["']\s*)""" + ), + 'import%(import)s from "%(url)s"\n', + ), + ( + ( + r"""(?Pexport(?s:(?P[\s\{].*?))""" + r"""\s*from\s*["'](?P[\.\/].*?)["']\s*)""" + ), + 'export%(exports)s from "%(url)s"\n', + ), + ( + r"""(?Pimport\s*['"](?P[\.\/].*?)["']\s*)""", + 'import"%(url)s"\n', + ), + ( + r"""(?Pimport\(["'](?P.*?)["']\)\.then)""", + """import("%(url)s").then""", + ), + ( + r"""(?Pawait import\(["'](?P.*?)["']\))""", + """await import("%(url)s")""", + ), + ), + ) + + def post_process(self, paths, **options): + collected = super().post_process(paths, **options) + for original_path, processed_path, processed in collected: + if isinstance(processed, Exception): + print("Error with file", original_path) + raise processed + if processed_path.endswith(".js"): + path = Path(settings.STATIC_ROOT) / processed_path + initial = path.read_text() + if "sourceMappingURL" not in initial: # Already minified. + minified = jsmin(initial) + path.write_text(minified) + if processed_path.endswith(".css"): + path = Path(settings.STATIC_ROOT) / processed_path + initial = path.read_text() + if "sourceMappingURL" not in initial: # Already minified. + minified = cssmin(initial) + path.write_text(minified) + yield original_path, processed_path, True diff --git a/umap/tests/test_datalayer_s3.py b/umap/tests/test_datalayer_s3.py index fd044ac15..fdebb214a 100644 --- a/umap/tests/test_datalayer_s3.py +++ b/umap/tests/test_datalayer_s3.py @@ -27,7 +27,7 @@ def patch_storage(): DataLayer.geojson.field.storage = storages.create_storage( { - "BACKEND": "umap.storage.UmapS3", + "BACKEND": "umap.storage.s3.S3DataStorage", "OPTIONS": { "access_key": "testing", "secret_key": "testing", diff --git a/umap/tests/test_statics.py b/umap/tests/test_statics.py index c29b57d71..97dc896ca 100644 --- a/umap/tests/test_statics.py +++ b/umap/tests/test_statics.py @@ -15,7 +15,7 @@ def staticfiles(settings): # Make sure settings are properly reset after the test settings.STORAGES = deepcopy(settings.STORAGES) settings.STORAGES["staticfiles"]["BACKEND"] = ( - "umap.storage.UmapManifestStaticFilesStorage" + "umap.storage.staticfiles.UmapManifestStaticFilesStorage" ) try: call_command("collectstatic", "--noinput")