refactor: sanitize archive table name

DHARPA-Project · Apr 9, 2024 · 5ebfc9f · 5ebfc9f
1 parent b20433e
commit 5ebfc9f
Show file tree

Hide file tree

Showing 12 changed files with 240 additions and 214 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -434,7 +434,7 @@ select = [
     "PIE",
 ]
 #select = ["E", "F", "RUF100", "W", "I001"]
-ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603"]
+ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603", "S608"]
 per-file-ignores = { }
 
 

diff --git a/src/kiara/defaults.py b/src/kiara/defaults.py
@@ -310,3 +310,45 @@ class CHUNK_COMPRESSION_TYPE(Enum):
 
 ARCHIVE_NAME_MARKER = "archive_name"
 DATA_ARCHIVE_DEFAULT_VALUE_MARKER = "default_value"
+TABLE_NAME_ARCHIVE_METADATA = "archive_metadata"
+TABLE_NAME_DATA_METADATA = "data_value_metadata"
+TABLE_NAME_DATA_SERIALIZATION_METADATA = "data_serialization_metadata"
+TABLE_NAME_DATA_CHUNKS = "data_chunks"
+TABLE_NAME_DATA_PEDIGREE = "data_value_pedigree"
+TABLE_NAME_DATA_DESTINIES = "data_value_destiny"
+REQUIRED_TABLES_DATA_ARCHIVE = {
+    TABLE_NAME_ARCHIVE_METADATA,
+    TABLE_NAME_DATA_METADATA,
+    TABLE_NAME_DATA_SERIALIZATION_METADATA,
+    TABLE_NAME_DATA_CHUNKS,
+    TABLE_NAME_DATA_PEDIGREE,
+    TABLE_NAME_DATA_DESTINIES,
+}
+
+TABLE_NAME_ALIASES = "aliases"
+REQUIRED_TABLES_ALIAS_ARCHIVE = {
+    TABLE_NAME_ARCHIVE_METADATA,
+    TABLE_NAME_ALIASES,
+}
+
+TABLE_NAME_JOB_RECORDS = "job_records"
+REQUIRED_TABLES_JOB_ARCHIVE = {
+    TABLE_NAME_ARCHIVE_METADATA,
+    TABLE_NAME_JOB_RECORDS,
+}
+
+TABLE_NAME_METADATA = "metadata"
+TABLE_NAME_METADATA_SCHEMAS = "metadata_schemas"
+TABLE_NAME_METADATA_REFERENCES = "metadata_references"
+REQUIRED_TABLES_METADATA = {
+    TABLE_NAME_ARCHIVE_METADATA,
+    TABLE_NAME_METADATA,
+    TABLE_NAME_METADATA_SCHEMAS,
+    TABLE_NAME_METADATA_REFERENCES,
+}
+
+
+ALL_REQUIRED_TABLES = set(REQUIRED_TABLES_DATA_ARCHIVE)
+ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_ALIAS_ARCHIVE)
+ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_JOB_ARCHIVE)
+ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_METADATA)
diff --git a/src/kiara/registries/aliases/sqlite_store.py b/src/kiara/registries/aliases/sqlite_store.py
@@ -6,6 +6,11 @@
 from sqlalchemy import text
 from sqlalchemy.engine import Engine
 
+from kiara.defaults import (
+    REQUIRED_TABLES_ALIAS_ARCHIVE,
+    TABLE_NAME_ALIASES,
+    TABLE_NAME_ARCHIVE_METADATA,
+)
 from kiara.registries import SqliteArchiveConfig
 from kiara.registries.aliases import AliasArchive, AliasStore
 from kiara.utils.dates import get_current_time_incl_timezone
@@ -36,9 +41,7 @@ def _load_archive_config(
         tables = {x[0] for x in cursor.fetchall()}
         con.close()
 
-        required_tables = {
-            "aliases",
-        }
+        required_tables = REQUIRED_TABLES_ALIAS_ARCHIVE
 
         if not required_tables.issubset(tables):
             return None
@@ -66,7 +69,7 @@ def __init__(
 
     def _retrieve_archive_metadata(self) -> Mapping[str, Any]:
 
-        sql = text("SELECT key, value FROM archive_metadata")
+        sql = text(f"SELECT key, value FROM {TABLE_NAME_ARCHIVE_METADATA}")
 
         with self.sqlite_engine.connect() as connection:
             result = connection.execute(sql)
@@ -104,8 +107,8 @@ def sqlite_engine(self) -> "Engine":
             use_wal_mode=self._use_wal_mode,
         )
 
-        create_table_sql = """
-CREATE TABLE IF NOT EXISTS aliases (
+        create_table_sql = f"""
+CREATE TABLE IF NOT EXISTS {TABLE_NAME_ALIASES} (
     alias TEXT PRIMARY KEY,
     value_id TEXT NOT NULL,
     alias_created TEXT NOT NULL
@@ -122,7 +125,7 @@ def sqlite_engine(self) -> "Engine":
 
     def find_value_id_for_alias(self, alias: str) -> Union[uuid.UUID, None]:
 
-        sql = text("SELECT value_id FROM aliases WHERE alias = :alias")
+        sql = text(f"SELECT value_id FROM {TABLE_NAME_ALIASES} WHERE alias = :alias")
         with self.sqlite_engine.connect() as connection:
             result = connection.execute(sql, {"alias": alias})
             row = result.fetchone()
@@ -132,14 +135,14 @@ def find_value_id_for_alias(self, alias: str) -> Union[uuid.UUID, None]:
 
     def find_aliases_for_value_id(self, value_id: uuid.UUID) -> Union[Set[str], None]:
 
-        sql = text("SELECT alias FROM aliases WHERE value_id = :value_id")
+        sql = text(f"SELECT alias FROM {TABLE_NAME_ALIASES} WHERE value_id = :value_id")
         with self.sqlite_engine.connect() as connection:
             result = connection.execute(sql, {"value_id": str(value_id)})
             return {row[0] for row in result}
 
     def retrieve_all_aliases(self) -> Union[Mapping[str, uuid.UUID], None]:
 
-        sql = text("SELECT alias, value_id FROM aliases")
+        sql = text(f"SELECT alias, value_id FROM {TABLE_NAME_ALIASES}")
         with self.sqlite_engine.connect() as connection:
             result = connection.execute(sql)
             return {row[0]: uuid.UUID(row[1]) for row in result}
@@ -173,9 +176,7 @@ def _load_archive_config(
         tables = {x[0] for x in cursor.fetchall()}
         con.close()
 
-        required_tables = {
-            "aliases",
-        }
+        required_tables = REQUIRED_TABLES_ALIAS_ARCHIVE
 
         if not required_tables.issubset(tables):
             return None
@@ -187,7 +188,7 @@ def _set_archive_metadata_value(self, key: str, value: Any):
         """Set custom metadata for the archive."""
 
         sql = text(
-            "INSERT OR REPLACE INTO archive_metadata (key, value) VALUES (:key, :value)"
+            f"INSERT OR REPLACE INTO {TABLE_NAME_ARCHIVE_METADATA} (key, value) VALUES (:key, :value)"
         )
         with self.sqlite_engine.connect() as conn:
             params = {"key": key, "value": value}
@@ -199,7 +200,7 @@ def register_aliases(self, value_id: uuid.UUID, *aliases: str):
         alias_created = get_current_time_incl_timezone().isoformat()
 
         sql = text(
-            "INSERT OR REPLACE INTO aliases (alias, value_id, alias_created) VALUES (:alias, :value_id, :alias_created)"
+            f"INSERT OR REPLACE INTO {TABLE_NAME_ALIASES} (alias, value_id, alias_created) VALUES (:alias, :value_id, :alias_created)"
         )
 
         with self.sqlite_engine.connect() as connection:

diff --git a/src/kiara/registries/data/data_store/__init__.py b/src/kiara/registries/data/data_store/__init__.py
@@ -198,31 +198,31 @@ def has_value(self, value_id: uuid.UUID) -> bool:
             return False
         return value_id in all_value_ids
 
-    def retrieve_environment_details(
-        self, env_type: str, env_hash: str
-    ) -> Mapping[str, Any]:
-        """
-        Retrieve the environment details with the specified type and hash.
-
-        The environment is stored by the data store as a dictionary, including it's schema, not as the actual Python model.
-        This is to make sure it can still be loaded later on, in case the Python model has changed in later versions.
-        """
-        cached = self._env_cache.get(env_type, {}).get(env_hash, None)
-        if cached is not None:
-            return cached
-
-        env = self._retrieve_environment_details(env_type=env_type, env_hash=env_hash)
-        self._env_cache.setdefault(env_type, {})[env_hash] = env
-        return env
-
-    @abc.abstractmethod
-    def _retrieve_environment_details(
-        self, env_type: str, env_hash: str
-    ) -> Mapping[str, Any]:
-        """Retrieve the environment details with the specified type and hash.
-
-        Each store needs to implement this so environemnt details related to a value can be retrieved later on. Since in most cases the environment details will not change, a lookup is more efficient than having to store the full information with each value.
-        """
+    # def retrieve_environment_details(
+    #     self, env_type: str, env_hash: str
+    # ) -> Mapping[str, Any]:
+    #     """
+    #     Retrieve the environment details with the specified type and hash.
+    #
+    #     The environment is stored by the data store as a dictionary, including it's schema, not as the actual Python model.
+    #     This is to make sure it can still be loaded later on, in case the Python model has changed in later versions.
+    #     """
+    #     cached = self._env_cache.get(env_type, {}).get(env_hash, None)
+    #     if cached is not None:
+    #         return cached
+    #
+    #     env = self._retrieve_environment_details(env_type=env_type, env_hash=env_hash)
+    #     self._env_cache.setdefault(env_type, {})[env_hash] = env
+    #     return env
+    #
+    # @abc.abstractmethod
+    # def _retrieve_environment_details(
+    #     self, env_type: str, env_hash: str
+    # ) -> Mapping[str, Any]:
+    #     """Retrieve the environment details with the specified type and hash.
+    #
+    #     Each store needs to implement this so environemnt details related to a value can be retrieved later on. Since in most cases the environment details will not change, a lookup is more efficient than having to store the full information with each value.
+    #     """
 
     def find_values(self, matcher: ValueMatcher) -> Iterable[Value]:
         raise NotImplementedError()

diff --git a/src/kiara/registries/data/data_store/filesystem_store.py b/src/kiara/registries/data/data_store/filesystem_store.py
@@ -182,20 +182,20 @@ def get_path(
         result.mkdir(parents=True, exist_ok=True)
         return result
 
-    def _retrieve_environment_details(
-        self, env_type: str, env_hash: str
-    ) -> Mapping[str, Any]:
-
-        base_path = self.get_path(entity_type=EntityType.ENVIRONMENT)
-        env_details_file = base_path / f"{env_type}_{env_hash}.json"
-
-        if not env_details_file.exists():
-            raise Exception(
-                f"Can't load environment details, file does not exist: {env_details_file.as_posix()}"
-            )
-
-        environment: Mapping[str, Any] = orjson.loads(env_details_file.read_text())
-        return environment
+    # def _retrieve_environment_details(
+    #     self, env_type: str, env_hash: str
+    # ) -> Mapping[str, Any]:
+    #
+    #     base_path = self.get_path(entity_type=EntityType.ENVIRONMENT)
+    #     env_details_file = base_path / f"{env_type}_{env_hash}.json"
+    #
+    #     if not env_details_file.exists():
+    #         raise Exception(
+    #             f"Can't load environment details, file does not exist: {env_details_file.as_posix()}"
+    #         )
+    #
+    #     environment: Mapping[str, Any] = orjson.loads(env_details_file.read_text())
+    #     return environment
 
     def retrieve_all_job_hashes(
         self,