From d72bc17e8e4110355f7f1115e4c7f2d183fce942 Mon Sep 17 00:00:00 2001 From: Markus Binsteiner Date: Tue, 5 Mar 2024 12:26:38 +0100 Subject: [PATCH] refactor: rename model_type_name -> model_type_id --- src/kiara/context/__init__.py | 8 +- src/kiara/context/config.py | 2 +- .../interfaces/python_api/models/archive.py | 2 +- src/kiara/models/__init__.py | 23 ++++-- src/kiara/models/metadata/__init__.py | 18 +++++ src/kiara/registries/metadata/__init__.py | 15 +++- .../metadata/metadata_store/__init__.py | 75 ++++++++++++++++++- .../metadata/metadata_store/sqlite_store.py | 26 ++++++- 8 files changed, 146 insertions(+), 23 deletions(-) create mode 100644 src/kiara/models/metadata/__init__.py diff --git a/src/kiara/context/__init__.py b/src/kiara/context/__init__.py index 306b15dee..f23a2a837 100644 --- a/src/kiara/context/__init__.py +++ b/src/kiara/context/__init__.py @@ -130,6 +130,8 @@ def __init__( self._config: KiaraContextConfig = config self._runtime_config: KiaraRuntimeConfig = runtime_config + self._env_mgmt: EnvironmentRegistry = EnvironmentRegistry.instance() + self._event_registry: EventRegistry = EventRegistry(kiara=self) self._type_registry: TypeRegistry = TypeRegistry(self) self._data_registry: DataRegistry = DataRegistry(kiara=self) @@ -147,8 +149,6 @@ def __init__( self._render_registry = RenderRegistry(kiara=self) - self._env_mgmt: Union[EnvironmentRegistry, None] = None - metadata_augmenter = CreateMetadataDestinies(kiara=self) self._event_registry.add_listener( metadata_augmenter, *metadata_augmenter.supported_event_types() @@ -245,10 +245,6 @@ def context_info(self) -> "KiaraContextInfo": @property def environment_registry(self) -> EnvironmentRegistry: - if self._env_mgmt is not None: - return self._env_mgmt - - self._env_mgmt = EnvironmentRegistry.instance() return self._env_mgmt @property diff --git a/src/kiara/context/config.py b/src/kiara/context/config.py index 1cb040257..f5f8382ec 100644 --- a/src/kiara/context/config.py +++ b/src/kiara/context/config.py @@ -389,7 +389,7 @@ def create_default_store_config( return data_store -DEFAULT_STORE_TYPE = "auto" +DEFAULT_STORE_TYPE: Literal["auto"] = "auto" class KiaraConfig(BaseSettings): diff --git a/src/kiara/interfaces/python_api/models/archive.py b/src/kiara/interfaces/python_api/models/archive.py index 8cfd56a4a..d7cc32fe9 100644 --- a/src/kiara/interfaces/python_api/models/archive.py +++ b/src/kiara/interfaces/python_api/models/archive.py @@ -220,7 +220,7 @@ def metadata_archive(self) -> "MetadataArchive": from kiara.utils.stores import create_new_archive metadata_archive: MetadataArchive = create_new_archive( # type: ignore - archive_name=self._archive_name, + archive_name=self.archive_name, store_base_path=self.archive_base_path, store_type="sqlite_metadata_store", file_name=self.archive_file_name, diff --git a/src/kiara/models/__init__.py b/src/kiara/models/__init__.py index 80555835d..79750e968 100644 --- a/src/kiara/models/__init__.py +++ b/src/kiara/models/__init__.py @@ -10,7 +10,6 @@ import networkx as nx from dag_cbor import IPLDKind -from deepdiff import DeepHash from multiformats import CID from pydantic import ConfigDict from pydantic.fields import PrivateAttr @@ -30,7 +29,7 @@ from kiara.registries.templates import TemplateRegistry from kiara.utils.class_loading import _default_id_func from kiara.utils.develop import log_dev_message -from kiara.utils.hashing import KIARA_HASH_FUNCTION, compute_cid +from kiara.utils.hashing import compute_cid from kiara.utils.json import orjson_dumps from kiara.utils.models import ( assemble_subcomponent_graph, @@ -58,13 +57,23 @@ class KiaraModel(ABC, BaseModel, JupyterMixin): # return to_camel_case(cls._kiara_model_name) @classmethod - def get_schema_hash(cls) -> int: + def get_schema_cid(cls) -> CID: if cls._schema_hash_cache is not None: return cls._schema_hash_cache - obj = cls.model_json_schema() - h = DeepHash(obj, hasher=KIARA_HASH_FUNCTION) - cls._schema_hash_cache = h[obj] + model_schema = cls.model_json_schema() + try: + _, cid = compute_cid(data=model_schema) + except Exception as e: + from kiara.utils.output import extract_renderable + + msg = "Failed to compute cid for model schema instance." + item = extract_renderable(model_schema) + renderable = Group(msg, item, extract_renderable(e)) + log_dev_message(renderable, title="cid computation error") + raise e + + cls._schema_hash_cache = cid return cls._schema_hash_cache _graph_cache: Union[nx.DiGraph, None] = PrivateAttr(default=None) @@ -72,7 +81,7 @@ def get_schema_hash(cls) -> int: _dynamic_subcomponents: Dict[str, "KiaraModel"] = PrivateAttr(default_factory=dict) _id_cache: Union[str, None] = PrivateAttr(default=None) _category_id_cache: Union[str, None] = PrivateAttr(default=None) - _schema_hash_cache: ClassVar[Union[None, int]] = None + _schema_hash_cache: ClassVar[Union[None, CID]] = None _cid_cache: Union[CID, None] = PrivateAttr(default=None) _dag_cache: Union[bytes, None] = PrivateAttr(default=None) _size_cache: Union[int, None] = PrivateAttr(default=None) diff --git a/src/kiara/models/metadata/__init__.py b/src/kiara/models/metadata/__init__.py new file mode 100644 index 000000000..e5b9ac9f2 --- /dev/null +++ b/src/kiara/models/metadata/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from typing import Any, ClassVar + +from pydantic import Field + +from kiara.models import KiaraModel + + +class KiaraMetadata(KiaraModel): + def _retrieve_data_to_hash(self) -> Any: + return {"metadata": self.model_dump(), "schema": self.schema_json()} + + +class CommentMetadata(KiaraMetadata): + + _kiara_model_id: ClassVar = "instance.kiara_metadata.comment" + + comment: str = Field(description="A note/comment.") diff --git a/src/kiara/registries/metadata/__init__.py b/src/kiara/registries/metadata/__init__.py index c4241f815..2a99ef59a 100644 --- a/src/kiara/registries/metadata/__init__.py +++ b/src/kiara/registries/metadata/__init__.py @@ -5,8 +5,8 @@ from pydantic import Field from kiara.defaults import DEFAULT_METADATA_STORE_MARKER, DEFAULT_STORE_MARKER -from kiara.models import KiaraModel from kiara.models.events import RegistryEvent +from kiara.models.metadata import KiaraMetadata from kiara.registries.metadata.metadata_store import MetadataArchive, MetadataStore if TYPE_CHECKING: @@ -142,5 +142,14 @@ def get_archive( f"Can't retrieve archive with id '{archive_id_or_alias}': no archive with that id registered." ) - def register_metadata_item(self, key: str, item: KiaraModel): - pass + def register_metadata_item( + self, + key: str, + item: KiaraMetadata, + metadata_store: Union[str, uuid.UUID, None] = None, + ): + + store: MetadataStore = self.get_archive(archive_id_or_alias=metadata_store) # type: ignore + + store.store_metadata_item(key=key, item=item) + # print(f"Result: {result}") diff --git a/src/kiara/registries/metadata/metadata_store/__init__.py b/src/kiara/registries/metadata/metadata_store/__init__.py index d5e894292..d97387f07 100644 --- a/src/kiara/registries/metadata/metadata_store/__init__.py +++ b/src/kiara/registries/metadata/metadata_store/__init__.py @@ -1,7 +1,10 @@ # -*- coding: utf-8 -*- import abc -from typing import Any, Generic, Iterable, Union +import json +import uuid +from typing import Any, Dict, Generic, Iterable, Mapping, Union +from kiara.models.metadata import KiaraMetadata from kiara.registries import ARCHIVE_CONFIG_CLS, BaseArchive @@ -38,10 +41,78 @@ def retrieve_metadata_value( class MetadataStore(MetadataArchive): + def __init__( + self, + archive_name: str, + archive_config: ARCHIVE_CONFIG_CLS, + force_read_only: bool = False, + ): + + super().__init__( + archive_name=archive_name, + archive_config=archive_config, + force_read_only=force_read_only, + ) + self._schema_stored_cache: Dict[str, Any] = {} + @classmethod def _is_writeable(cls) -> bool: return True @abc.abstractmethod - def store_metadata(self, key: str, value: Any): + def _store_metadata_schema( + self, model_schema_hash: str, model_type_id: str, model_schema: str + ): + """Store the metadata schema for the specified model.""" + + def store_metadata_item( + self, + key: str, + item: KiaraMetadata, + reference_item: Any = None, + store: Union[str, uuid.UUID, None] = None, + ): + + if reference_item: + raise NotImplementedError( + "Cannot store metadata item with reference item, not implemented yet." + ) + + if store: + raise NotImplementedError( + "Cannot store metadata item with store, not implemented yet." + ) + + # TODO: check if already stored + model_type = item.model_type_id + model_schema_hash = str(item.get_schema_cid()) + model_item_schema = item.model_json_schema() + model_item_schema_str = json.dumps(model_item_schema) + + self._store_metadata_schema( + model_schema_hash=model_schema_hash, + model_type_id=model_type, + model_schema=model_item_schema_str, + ) + + data = item.model_dump() + data_hash = str(item.instance_cid) + + self._store_metadata_item( + key=key, + value=data, + value_hash=data_hash, + model_type_id=model_type, + model_schema_hash=model_schema_hash, + ) + + @abc.abstractmethod + def _store_metadata_item( + self, + key: str, + value: Mapping[str, Any], + value_hash: str, + model_type_id: str, + model_schema_hash: str, + ): pass diff --git a/src/kiara/registries/metadata/metadata_store/sqlite_store.py b/src/kiara/registries/metadata/metadata_store/sqlite_store.py index 905944220..3cbafeebe 100644 --- a/src/kiara/registries/metadata/metadata_store/sqlite_store.py +++ b/src/kiara/registries/metadata/metadata_store/sqlite_store.py @@ -109,11 +109,18 @@ def sqlite_engine(self) -> "Engine": self._cached_engine = create_engine(self.db_url, future=True) create_table_sql = """ +CREATE TABLE IF NOT EXISTS metadata_schemas ( + model_schema_hash TEXT PRIMARY KEY, + model_type_id TEXT NOT NULL, + model_schema TEXT NOT NULL +); CREATE TABLE IF NOT EXISTS metadata ( metadata_item_id TEXT PRIMARY KEY, metadata_key TEXT NOT NULL, - reference_item_type TEXT NOT NULL, - reference_item_id TEXT NOT NULL, + model_type_id TEXT NOT NULL, + model_schema_hash TEXT NOT NULL, + reference_item_type TEXT, + reference_item_id TEXT, metadata_value TEXT NOT NULL ); """ @@ -158,6 +165,19 @@ def _load_archive_config( # config = SqliteArchiveConfig(sqlite_db_path=store_uri) return {"sqlite_db_path": archive_uri} - def store_metadata(self, key: str, value: Any): + def _store_metadata_schema( + self, model_schema_hash: str, model_type_id: str, model_schema: str + ): + + raise NotImplementedError() + + def _store_metadata_item( + self, + key: str, + value: Mapping[str, Any], + value_hash: str, + model_type_id: str, + model_schema_hash: str, + ): raise NotImplementedError()