From 7745b7bb14f4a60d838920aae6afbe681f0299ed Mon Sep 17 00:00:00 2001 From: Markus Binsteiner Date: Tue, 30 Jan 2024 17:43:25 +0100 Subject: [PATCH] feat: make store type configurable --- src/kiara/context/config.py | 80 +++++++++++++++---- src/kiara/interfaces/python_api/__init__.py | 11 ++- src/kiara/registries/__init__.py | 57 ++++++++++--- src/kiara/registries/aliases/archives.py | 47 ++++++++++- src/kiara/registries/aliases/sqlite_store.py | 10 +-- .../registries/data/data_store/__init__.py | 19 ++++- .../data/data_store/filesystem_store.py | 45 +++++++++-- .../data/data_store/sqlite_store.py | 29 +++++-- .../jobs/job_store/filesystem_store.py | 44 +++++++++- .../registries/jobs/job_store/sqlite_store.py | 21 +++-- src/kiara/registries/workflows/archives.py | 44 ++++++++-- 11 files changed, 331 insertions(+), 76 deletions(-) diff --git a/src/kiara/context/config.py b/src/kiara/context/config.py index ca9f44638..13f133de7 100644 --- a/src/kiara/context/config.py +++ b/src/kiara/context/config.py @@ -8,7 +8,17 @@ import os import uuid from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Literal, + Mapping, + Type, + Union, +) import structlog from pydantic import BaseModel, ConfigDict, field_validator, model_validator @@ -362,6 +372,10 @@ def load_from_file(cls, path: Union[Path, str, None] = None) -> "KiaraConfig": description="The name of the default context to use if none is provided.", default=DEFAULT_CONTEXT_NAME, ) + default_store_type: Literal["sqlite", "filesystem"] = Field( + description="The default store type to ues if not specified.", + default="filesystem", + ) auto_generate_contexts: bool = Field( description="Whether to auto-generate requested contexts if they don't exist yet.", default=True, @@ -534,35 +548,69 @@ def create_default_sqlite_archive_config() -> Dict[str, Any]: if DEFAULT_DATA_STORE_MARKER not in context_config.archives.keys(): - default_sqlite_config = create_default_sqlite_archive_config() - - data_store = KiaraArchiveConfig( - archive_type="sqlite_data_store", config=default_sqlite_config - ) + if self.default_store_type == "sqlite": + default_sqlite_config = create_default_sqlite_archive_config() + data_store = KiaraArchiveConfig( + archive_type="sqlite_data_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + data_store_type = "filesystem_data_store" + data_store = create_default_store_config( + store_type=data_store_type, + stores_base_path=os.path.join(filesystem_base_path, "data"), + ) + else: + raise Exception( + f"Can't create default data store: invalid default store type '{self.default_store_type}'." + ) context_config.archives[DEFAULT_DATA_STORE_MARKER] = data_store changed = True if DEFAULT_JOB_STORE_MARKER not in context_config.archives.keys(): - if default_sqlite_config is None: - default_sqlite_config = create_default_sqlite_archive_config() + if self.default_store_type == "sqlite": - job_store = KiaraArchiveConfig( - archive_type="sqlite_job_store", config=default_sqlite_config - ) + if default_sqlite_config is None: + default_sqlite_config = create_default_sqlite_archive_config() + + job_store = KiaraArchiveConfig( + archive_type="sqlite_job_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + job_store_type = "filesystem_job_store" + job_store = create_default_store_config( + store_type=job_store_type, + stores_base_path=os.path.join(filesystem_base_path, "jobs"), + ) + else: + raise Exception( + f"Can't create default job store: invalid default store type '{self.default_store_type}'." + ) context_config.archives[DEFAULT_JOB_STORE_MARKER] = job_store changed = True if DEFAULT_ALIAS_STORE_MARKER not in context_config.archives.keys(): - if default_sqlite_config is None: - default_sqlite_config = create_default_sqlite_archive_config() + if self.default_store_type == "sqlite": - alias_store = KiaraArchiveConfig( - archive_type="sqlite_alias_store", config=default_sqlite_config - ) + if default_sqlite_config is None: + default_sqlite_config = create_default_sqlite_archive_config() + + alias_store = KiaraArchiveConfig( + archive_type="sqlite_alias_store", config=default_sqlite_config + ) + elif self.default_store_type == "filesystem": + alias_store_type = "filesystem_alias_store" + alias_store = create_default_store_config( + store_type=alias_store_type, + stores_base_path=os.path.join(filesystem_base_path, "aliases"), + ) + else: + raise Exception( + f"Can't create default alias store: invalid default store type '{self.default_store_type}'." + ) context_config.archives[DEFAULT_ALIAS_STORE_MARKER] = alias_store changed = True diff --git a/src/kiara/interfaces/python_api/__init__.py b/src/kiara/interfaces/python_api/__init__.py index 1381cbe4a..7f6b6f6ad 100644 --- a/src/kiara/interfaces/python_api/__init__.py +++ b/src/kiara/interfaces/python_api/__init__.py @@ -691,7 +691,9 @@ def create_operation( if not module_config: raise Exception("Pipeline configuration can't be empty.") assert module_config is None or isinstance(module_config, Mapping) - operation = create_operation("pipeline", operation_config=module_config) + operation = create_operation( + "pipeline", operation_config=module_config, kiara=self.context + ) return operation else: mc = Manifest(module_type=module_type, module_config=module_config) @@ -1680,7 +1682,7 @@ def store_value( def store_values( self, values: Mapping[str, Union[str, uuid.UUID, Value]], - alias_map: Mapping[str, Iterable[str]], + alias_map: Union[Mapping[str, Iterable[str]], None] = None, ) -> StoreValuesResult: """ Store multiple values into the (default) kiara value store. @@ -1697,7 +1699,10 @@ def store_values( """ result = {} for field_name, value in values.items(): - aliases = alias_map.get(field_name) + if alias_map: + aliases = alias_map.get(field_name) + else: + aliases = None value_obj = self.get_value(value) store_result = self.store_value(value=value_obj, alias=aliases) result[field_name] = store_result diff --git a/src/kiara/registries/__init__.py b/src/kiara/registries/__init__.py index 058465083..a49247bbd 100644 --- a/src/kiara/registries/__init__.py +++ b/src/kiara/registries/__init__.py @@ -7,10 +7,19 @@ import abc import os -import typing import uuid from pathlib import Path -from typing import TYPE_CHECKING, Generic, Iterable, Type, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generic, + Iterable, + Mapping, + Type, + TypeVar, + Union, +) import structlog from pydantic import BaseModel, ConfigDict, Field @@ -56,10 +65,17 @@ class ArchiveDetails(BaseModel): ) +class ArchiveMetadta(BaseModel): + + archive_id: Union[uuid.UUID, None] = Field( + description="The id of the stored archive.", default=None + ) + + NON_ARCHIVE_DETAILS = ArchiveDetails() -class KiaraArchive(abc.ABC, typing.Generic[ARCHIVE_CONFIG_CLS]): +class KiaraArchive(abc.ABC, Generic[ARCHIVE_CONFIG_CLS]): _config_cls: Type[ARCHIVE_CONFIG_CLS] = None # type: ignore @@ -89,7 +105,7 @@ class KiaraArchive(abc.ABC, typing.Generic[ARCHIVE_CONFIG_CLS]): @classmethod def _load_store_config( cls, store_uri: str, allow_write_access: bool, **kwargs - ) -> Union[typing.Dict[str, typing.Any], None]: + ) -> Union[Dict[str, Any], None]: """Tries to assemble an archive config from an uri (and optional paramters). If the archive type supports the archive at the uri, then a valid config will be returned, @@ -101,7 +117,7 @@ def _load_store_config( @classmethod def load_store_config( cls, store_uri: str, allow_write_access: bool, **kwargs - ) -> Union[typing.Dict[str, typing.Any], None]: + ) -> Union[Dict[str, Any], None]: log_message( "attempt_loading_existing_store", @@ -140,7 +156,17 @@ def __init__( self._archive_alias: str = archive_alias self._config: ARCHIVE_CONFIG_CLS = archive_config self._force_read_only: bool = force_read_only - self._archive_id: Union[uuid.UUID, None] = None + + self._archive_metadata: Union[Mapping[str, Any], None] = None + + @property + def archive_metadata(self) -> ArchiveMetadta: + + if self._archive_metadata is None: + archive_metadata = self._retrieve_archive_metadata() + self._archive_metadata = ArchiveMetadta(**archive_metadata) + + return self._archive_metadata @classmethod @abc.abstractmethod @@ -156,6 +182,16 @@ def _is_writeable(cls) -> bool: def register_archive(self, kiara: "Kiara"): pass + @abc.abstractmethod + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + """Retrieve metadata for the archive. + + Must contain at least one key 'archive_id', with a uuid-able value that + uniquely identifies the archive. + """ + + raise NotImplementedError() + @property def archive_alias(self) -> str: return self._archive_alias @@ -175,15 +211,10 @@ def is_writeable(self) -> bool: # def register_archive(self, kiara: "Kiara"): # pass - @abc.abstractmethod - def _retrieve_archive_id(self) -> uuid.UUID: - raise NotImplementedError() - @property def archive_id(self) -> uuid.UUID: - if self._archive_id is None: - self._archive_id = self._retrieve_archive_id() - return self._archive_id + + return self.archive_metadata.archive_id @property def config(self) -> ARCHIVE_CONFIG_CLS: diff --git a/src/kiara/registries/aliases/archives.py b/src/kiara/registries/aliases/archives.py index e4738411b..19655f613 100644 --- a/src/kiara/registries/aliases/archives.py +++ b/src/kiara/registries/aliases/archives.py @@ -9,7 +9,9 @@ import shutil import uuid from pathlib import Path -from typing import Mapping, Set, Union +from typing import Any, Mapping, Set, Union + +from orjson import orjson from kiara.registries import ARCHIVE_CONFIG_CLS, FileSystemArchiveConfig from kiara.registries.aliases import AliasArchive, AliasStore @@ -21,11 +23,44 @@ class FileSystemAliasArchive(AliasArchive): _archive_type_name = "filesystem_alias_archive" _config_cls = FileSystemArchiveConfig # type: ignore - def __init__(self, archive_id: uuid.UUID, config: ARCHIVE_CONFIG_CLS): - - super().__init__(archive_id=archive_id, config=config) + def __init__( + self, + archive_alias: str, + archive_config: ARCHIVE_CONFIG_CLS, + force_read_only: bool = False, + ): + + super().__init__( + archive_alias=archive_alias, + archive_config=archive_config, + force_read_only=force_read_only, + ) self._base_path: Union[Path, None] = None + self._archive_metadata: Union[Mapping[str, Any], None] = None + + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + if self._archive_metadata is not None: + return self._archive_metadata + + if not self.archive_metadata_path.is_file(): + _archive_metadata = {} + else: + _archive_metadata = orjson.loads(self.archive_metadata_path.read_bytes()) + + archive_id = _archive_metadata.get("archive_id", None) + if not archive_id: + try: + _archive_id = uuid.UUID(self.alias_store_path.name) + _archive_metadata["archive_id"] = _archive_id + except Exception: + raise Exception( + f"Could not retrieve archive id for alias archive '{self.archive_alias}'." + ) + + self._archive_metadata = _archive_metadata + return self._archive_metadata @property def alias_store_path(self) -> Path: @@ -38,6 +73,10 @@ def alias_store_path(self) -> Path: self._base_path.mkdir(parents=True, exist_ok=True) return self._base_path + @property + def archive_metadata_path(self) -> Path: + return self.alias_store_path / "store_metadata.json" + @property def aliases_path(self) -> Path: return self.alias_store_path / "aliases" diff --git a/src/kiara/registries/aliases/sqlite_store.py b/src/kiara/registries/aliases/sqlite_store.py index 6e6ad9805..f6bacea0c 100644 --- a/src/kiara/registries/aliases/sqlite_store.py +++ b/src/kiara/registries/aliases/sqlite_store.py @@ -54,15 +54,13 @@ def __init__(self, archive_alias: str, archive_config: SqliteArchiveConfig): self._cached_engine: Union[Engine, None] = None # self._lock: bool = True - def _retrieve_archive_id(self) -> uuid.UUID: - sql = text("SELECT value FROM archive_metadata WHERE key='archive_id'") + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + sql = text("SELECT key, value FROM archive_metadata") with self.sqlite_engine.connect() as connection: result = connection.execute(sql) - row = result.fetchone() - if row is None: - raise Exception("No archive ID found in metadata") - return uuid.UUID(row[0]) + return {row[0]: row[1] for row in result} @property def sqlite_path(self): diff --git a/src/kiara/registries/data/data_store/__init__.py b/src/kiara/registries/data/data_store/__init__.py index 3cb428ee8..04c1fe0f7 100644 --- a/src/kiara/registries/data/data_store/__init__.py +++ b/src/kiara/registries/data/data_store/__init__.py @@ -9,7 +9,7 @@ import typing import uuid from io import BytesIO -from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, Set, Union +from typing import TYPE_CHECKING, Any, Dict, Iterable, Mapping, Set, Tuple, Union import structlog from rich.console import RenderableType @@ -390,7 +390,7 @@ def store_value(self, value: Value) -> PersistedData: return persisted_value @abc.abstractmethod - def _persist_chunk(self, chunk_id: str, chunk: Union[str, BytesIO]): + def _persist_chunks(self, chunks: typing.Iterator[Tuple[str, BytesIO]]): """Persist the specified chunk, and return the chunk id. If the chunk is a string, it represents a local file path, otherwise it is a BytesIO instance representing the actual data of the chunk. @@ -401,6 +401,8 @@ def _persist_value_data(self, value: Value) -> PersistedData: serialized_value: SerializedData = value.serialized_data + # dbg(serialized_value.model_dump()) + chunk_id_map = {} for key in serialized_value.get_keys(): @@ -428,13 +430,21 @@ def _persist_value_data(self, value: Value) -> PersistedData: f"Invalid serialized data type: {type(data_model)}. Available types: {', '.join(SERIALIZE_TYPES)}" ) + cids = serialized_value.get_cids_for_key(key) + chunk_iterable = zip(cids, chunks) + # chunks_to_persist.update(chunk_iterable) + # print(key) + # print(type(chunks)) + # self._persist_chunks(chunk_iterable) + chunk_ids = [] - for item in zip(serialized_value.get_cids_for_key(key), chunks): + for item in chunk_iterable: cid = item[0] _chunk = item[1] self._persist_chunk(str(cid), _chunk) chunk_ids.append(str(cid)) + chunk_ids = [str(cid) for cid in cids] scids = SerializedChunkIDs( chunk_id_list=chunk_ids, archive_id=self.archive_id, @@ -443,6 +453,9 @@ def _persist_value_data(self, value: Value) -> PersistedData: scids._data_registry = self.kiara_context.data_registry chunk_id_map[key] = scids + print("chunks_to_persist") + # print(chunks_to_persist) + pers_value = PersistedData( archive_id=self.archive_id, chunk_id_map=chunk_id_map, diff --git a/src/kiara/registries/data/data_store/filesystem_store.py b/src/kiara/registries/data/data_store/filesystem_store.py index 7d091a28b..980725ac6 100644 --- a/src/kiara/registries/data/data_store/filesystem_store.py +++ b/src/kiara/registries/data/data_store/filesystem_store.py @@ -9,7 +9,7 @@ from enum import Enum from io import BytesIO from pathlib import Path -from typing import TYPE_CHECKING, Any, Iterable, Mapping, Set, Union +from typing import TYPE_CHECKING, Any, Iterable, Mapping, Set, Tuple, Union import orjson import structlog @@ -58,20 +58,47 @@ class FileSystemDataArchive(DataArchive): def __init__( self, - archive_id: uuid.UUID, - config: FileSystemArchiveConfig, + archive_alias: str, + archive_config: FileSystemArchiveConfig, force_read_only: bool = False, ): - DataArchive.__init__( - self, archive_id=archive_id, config=config, force_read_only=force_read_only + super().__init__( + archive_alias=archive_alias, + archive_config=archive_config, + force_read_only=force_read_only, ) self._base_path: Union[Path, None] = None self._hashfs_path: Union[Path, None] = None self._hashfs: Union[HashFS, None] = None + self._archive_metadata: Union[Mapping[str, Any], None] = None - # def get_job_archive_id(self) -> uuid.UUID: - # return self._kiara.id + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + if self._archive_metadata is not None: + return self._archive_metadata + + if not self.archive_metadata_path.is_file(): + _archive_metadata = {} + else: + _archive_metadata = orjson.loads(self.archive_metadata_path.read_bytes()) + + archive_id = _archive_metadata.get("archive_id", None) + if not archive_id: + try: + _archive_id = uuid.UUID(self.data_store_path.name) + _archive_metadata["archive_id"] = _archive_id + except Exception: + raise Exception( + f"Could not retrieve archive id for alias archive '{self.archive_alias}'." + ) + + self._archive_metadata = _archive_metadata + return self._archive_metadata + + @property + def archive_metadata_path(self) -> Path: + return self.data_store_path / "store_metadata.json" def get_archive_details(self) -> ArchiveDetails: @@ -402,6 +429,10 @@ def _persist_destiny_backlinks(self, value: Value): fix_windows_symlink(value_file, destiny_file) + def _persist_chunks(self, *chunks: Tuple[str, Union[str, BytesIO]]): + + raise NotImplementedError() + def _persist_chunk(self, chunk_id: str, chunk: Union[str, BytesIO]): addr: HashAddress = self.hashfs.put_with_precomputed_hash(chunk, chunk_id) diff --git a/src/kiara/registries/data/data_store/sqlite_store.py b/src/kiara/registries/data/data_store/sqlite_store.py index d17c38aed..baae981f8 100644 --- a/src/kiara/registries/data/data_store/sqlite_store.py +++ b/src/kiara/registries/data/data_store/sqlite_store.py @@ -3,7 +3,7 @@ import uuid from io import BytesIO from pathlib import Path -from typing import Any, Iterable, Mapping, Set, Union +from typing import Any, Iterable, Iterator, Mapping, Set, Tuple, Union from orjson import orjson from sqlalchemy import Engine, create_engine, text @@ -81,15 +81,23 @@ def __init__( self._value_id_cache: Union[Iterable[uuid.UUID], None] = None # self._lock: bool = True - def _retrieve_archive_id(self) -> uuid.UUID: - sql = text("SELECT value FROM archive_metadata WHERE key='archive_id'") + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + sql = text("SELECT key, value FROM archive_metadata") with self.sqlite_engine.connect() as connection: result = connection.execute(sql) - row = result.fetchone() - if row is None: - raise Exception("No archive ID found in metadata") - return uuid.UUID(row[0]) + return {row[0]: row[1] for row in result} + + # def _retrieve_archive_id(self) -> uuid.UUID: + # sql = text("SELECT value FROM archive_metadata WHERE key='archive_id'") + # + # with self.sqlite_engine.connect() as connection: + # result = connection.execute(sql) + # row = result.fetchone() + # if row is None: + # raise Exception("No archive ID found in metadata") + # return uuid.UUID(row[0]) @property def sqlite_path(self): @@ -363,8 +371,15 @@ def _persist_environment_details( # # raise NotImplementedError() + def _persist_chunks(self, chunks: Iterator[Tuple[str, Union[str, BytesIO]]]): + + for chunk_id, chunk in chunks: + self._persist_chunk(str(chunk_id), chunk) + def _persist_chunk(self, chunk_id: str, chunk: Union[str, BytesIO]): + # print(f"sqlite: persisting chunk {chunk_id}") + sql = text( "SELECT EXISTS(SELECT 1 FROM values_data WHERE chunk_id = :chunk_id)" ) diff --git a/src/kiara/registries/jobs/job_store/filesystem_store.py b/src/kiara/registries/jobs/job_store/filesystem_store.py index 09a5bb92d..6d8fef631 100644 --- a/src/kiara/registries/jobs/job_store/filesystem_store.py +++ b/src/kiara/registries/jobs/job_store/filesystem_store.py @@ -8,7 +8,7 @@ import shutil import uuid from pathlib import Path -from typing import Any, Dict, Iterable, Union +from typing import Any, Dict, Iterable, Mapping, Union import orjson import structlog @@ -30,9 +30,18 @@ class FileSystemJobArchive(JobArchive): def supported_item_types(cls) -> Iterable[str]: return ["job_record"] - def __init__(self, archive_id: uuid.UUID, config: FileSystemArchiveConfig): - - super().__init__(archive_id=archive_id, config=config) + def __init__( + self, + archive_alias: str, + archive_config: FileSystemArchiveConfig, + force_read_only: bool = False, + ): + + super().__init__( + archive_alias=archive_alias, + archive_config=archive_config, + force_read_only=force_read_only, + ) self._base_path: Union[Path, None] = None def get_archive_details(self) -> ArchiveDetails: @@ -42,6 +51,33 @@ def get_archive_details(self) -> ArchiveDetails: ) return ArchiveDetails(size=size) + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + if self._archive_metadata is not None: + return self._archive_metadata + + if not self.archive_metadata_path.is_file(): + _archive_metadata = {} + else: + _archive_metadata = orjson.loads(self.archive_metadata_path.read_bytes()) + + archive_id = _archive_metadata.get("archive_id", None) + if not archive_id: + try: + _archive_id = uuid.UUID(self.job_store_path.name) + _archive_metadata["archive_id"] = _archive_id + except Exception: + raise Exception( + f"Could not retrieve archive id for alias archive '{self.archive_alias}'." + ) + + self._archive_metadata = _archive_metadata + return self._archive_metadata + + @property + def archive_metadata_path(self) -> Path: + return self.job_store_path / "store_metadata.json" + @property def job_store_path(self) -> Path: diff --git a/src/kiara/registries/jobs/job_store/sqlite_store.py b/src/kiara/registries/jobs/job_store/sqlite_store.py index 867cbf6eb..156fbab0d 100644 --- a/src/kiara/registries/jobs/job_store/sqlite_store.py +++ b/src/kiara/registries/jobs/job_store/sqlite_store.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import uuid from pathlib import Path from typing import Any, Iterable, Mapping, Union @@ -56,15 +55,23 @@ def __init__(self, archive_alias: str, archive_config: SqliteArchiveConfig): self._cached_engine: Union[Engine, None] = None # self._lock: bool = True - def _retrieve_archive_id(self) -> uuid.UUID: - sql = text("SELECT value FROM archive_metadata WHERE key='archive_id'") + # def _retrieve_archive_id(self) -> uuid.UUID: + # sql = text("SELECT value FROM archive_metadata WHERE key='archive_id'") + # + # with self.sqlite_engine.connect() as connection: + # result = connection.execute(sql) + # row = result.fetchone() + # if row is None: + # raise Exception("No archive ID found in metadata") + # return uuid.UUID(row[0]) + + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + sql = text("SELECT key, value FROM archive_metadata") with self.sqlite_engine.connect() as connection: result = connection.execute(sql) - row = result.fetchone() - if row is None: - raise Exception("No archive ID found in metadata") - return uuid.UUID(row[0]) + return {row[0]: row[1] for row in result} @property def sqlite_path(self): diff --git a/src/kiara/registries/workflows/archives.py b/src/kiara/registries/workflows/archives.py index 731ef38c0..e0e3f316b 100644 --- a/src/kiara/registries/workflows/archives.py +++ b/src/kiara/registries/workflows/archives.py @@ -2,7 +2,7 @@ import shutil import uuid from pathlib import Path -from typing import Dict, Iterable, Mapping, Union +from typing import Any, Dict, Iterable, Mapping, Union import orjson @@ -18,16 +18,48 @@ class FileSystemWorkflowArchive(WorkflowArchive): _archive_type_name = "filesystem_workflow_archive" _config_cls = FileSystemArchiveConfig # type: ignore - def __init__(self, archive_alias: str, archive_config: ARCHIVE_CONFIG_CLS): - - super().__init__(archive_alias=archive_alias, archive_config=archive_config) + def __init__( + self, + archive_alias: str, + archive_config: ARCHIVE_CONFIG_CLS, + force_read_only: bool = False, + ): + + super().__init__( + archive_alias=archive_alias, + archive_config=archive_config, + force_read_only=force_read_only, + ) self._base_path: Union[Path, None] = None self.alias_store_path.mkdir(parents=True, exist_ok=True) - def _retrieve_archive_id(self) -> uuid.UUID: + def _retrieve_archive_metadata(self) -> Mapping[str, Any]: + + if self._archive_metadata is not None: + return self._archive_metadata - return uuid.UUID(self.workflow_store_path.name) + if not self.archive_metadata_path.is_file(): + _archive_metadata = {} + else: + _archive_metadata = orjson.loads(self.archive_metadata_path.read_bytes()) + + archive_id = _archive_metadata.get("archive_id", None) + if not archive_id: + try: + _archive_id = uuid.UUID(self.workflow_store_path.name) + _archive_metadata["archive_id"] = _archive_id + except Exception: + raise Exception( + f"Could not retrieve archive id for alias archive '{self.archive_alias}'." + ) + + self._archive_metadata = _archive_metadata + return self._archive_metadata + + @property + def archive_metadata_path(self) -> Path: + return self.workflow_store_path / "store_metadata.json" @property def workflow_store_path(self) -> Path: