Skip to content

Commit

Permalink
refactor: sanitize archive table name
Browse files Browse the repository at this point in the history
  • Loading branch information
makkus committed Apr 9, 2024
1 parent b20433e commit 5ebfc9f
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 214 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ select = [
"PIE",
]
#select = ["E", "F", "RUF100", "W", "I001"]
ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603"]
ignore = ["E501", "S101", "SIM118", "SIM108", "PLR2004", "PLR0913", "S110", "PIE810", "PLR0911", "PLR0915", "PLR0912", "D", "D401", "PLW0603", "PLR5501", "PLW2901", "S603", "S608"]
per-file-ignores = { }


Expand Down
42 changes: 42 additions & 0 deletions src/kiara/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,3 +310,45 @@ class CHUNK_COMPRESSION_TYPE(Enum):

ARCHIVE_NAME_MARKER = "archive_name"
DATA_ARCHIVE_DEFAULT_VALUE_MARKER = "default_value"
TABLE_NAME_ARCHIVE_METADATA = "archive_metadata"
TABLE_NAME_DATA_METADATA = "data_value_metadata"
TABLE_NAME_DATA_SERIALIZATION_METADATA = "data_serialization_metadata"
TABLE_NAME_DATA_CHUNKS = "data_chunks"
TABLE_NAME_DATA_PEDIGREE = "data_value_pedigree"
TABLE_NAME_DATA_DESTINIES = "data_value_destiny"
REQUIRED_TABLES_DATA_ARCHIVE = {
TABLE_NAME_ARCHIVE_METADATA,
TABLE_NAME_DATA_METADATA,
TABLE_NAME_DATA_SERIALIZATION_METADATA,
TABLE_NAME_DATA_CHUNKS,
TABLE_NAME_DATA_PEDIGREE,
TABLE_NAME_DATA_DESTINIES,
}

TABLE_NAME_ALIASES = "aliases"
REQUIRED_TABLES_ALIAS_ARCHIVE = {
TABLE_NAME_ARCHIVE_METADATA,
TABLE_NAME_ALIASES,
}

TABLE_NAME_JOB_RECORDS = "job_records"
REQUIRED_TABLES_JOB_ARCHIVE = {
TABLE_NAME_ARCHIVE_METADATA,
TABLE_NAME_JOB_RECORDS,
}

TABLE_NAME_METADATA = "metadata"
TABLE_NAME_METADATA_SCHEMAS = "metadata_schemas"
TABLE_NAME_METADATA_REFERENCES = "metadata_references"
REQUIRED_TABLES_METADATA = {
TABLE_NAME_ARCHIVE_METADATA,
TABLE_NAME_METADATA,
TABLE_NAME_METADATA_SCHEMAS,
TABLE_NAME_METADATA_REFERENCES,
}


ALL_REQUIRED_TABLES = set(REQUIRED_TABLES_DATA_ARCHIVE)
ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_ALIAS_ARCHIVE)
ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_JOB_ARCHIVE)
ALL_REQUIRED_TABLES.update(REQUIRED_TABLES_METADATA)
29 changes: 15 additions & 14 deletions src/kiara/registries/aliases/sqlite_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
from sqlalchemy import text
from sqlalchemy.engine import Engine

from kiara.defaults import (
REQUIRED_TABLES_ALIAS_ARCHIVE,
TABLE_NAME_ALIASES,
TABLE_NAME_ARCHIVE_METADATA,
)
from kiara.registries import SqliteArchiveConfig
from kiara.registries.aliases import AliasArchive, AliasStore
from kiara.utils.dates import get_current_time_incl_timezone
Expand Down Expand Up @@ -36,9 +41,7 @@ def _load_archive_config(
tables = {x[0] for x in cursor.fetchall()}
con.close()

required_tables = {
"aliases",
}
required_tables = REQUIRED_TABLES_ALIAS_ARCHIVE

if not required_tables.issubset(tables):
return None
Expand Down Expand Up @@ -66,7 +69,7 @@ def __init__(

def _retrieve_archive_metadata(self) -> Mapping[str, Any]:

sql = text("SELECT key, value FROM archive_metadata")
sql = text(f"SELECT key, value FROM {TABLE_NAME_ARCHIVE_METADATA}")

with self.sqlite_engine.connect() as connection:
result = connection.execute(sql)
Expand Down Expand Up @@ -104,8 +107,8 @@ def sqlite_engine(self) -> "Engine":
use_wal_mode=self._use_wal_mode,
)

create_table_sql = """
CREATE TABLE IF NOT EXISTS aliases (
create_table_sql = f"""
CREATE TABLE IF NOT EXISTS {TABLE_NAME_ALIASES} (
alias TEXT PRIMARY KEY,
value_id TEXT NOT NULL,
alias_created TEXT NOT NULL
Expand All @@ -122,7 +125,7 @@ def sqlite_engine(self) -> "Engine":

def find_value_id_for_alias(self, alias: str) -> Union[uuid.UUID, None]:

sql = text("SELECT value_id FROM aliases WHERE alias = :alias")
sql = text(f"SELECT value_id FROM {TABLE_NAME_ALIASES} WHERE alias = :alias")
with self.sqlite_engine.connect() as connection:
result = connection.execute(sql, {"alias": alias})
row = result.fetchone()
Expand All @@ -132,14 +135,14 @@ def find_value_id_for_alias(self, alias: str) -> Union[uuid.UUID, None]:

def find_aliases_for_value_id(self, value_id: uuid.UUID) -> Union[Set[str], None]:

sql = text("SELECT alias FROM aliases WHERE value_id = :value_id")
sql = text(f"SELECT alias FROM {TABLE_NAME_ALIASES} WHERE value_id = :value_id")
with self.sqlite_engine.connect() as connection:
result = connection.execute(sql, {"value_id": str(value_id)})
return {row[0] for row in result}

def retrieve_all_aliases(self) -> Union[Mapping[str, uuid.UUID], None]:

sql = text("SELECT alias, value_id FROM aliases")
sql = text(f"SELECT alias, value_id FROM {TABLE_NAME_ALIASES}")
with self.sqlite_engine.connect() as connection:
result = connection.execute(sql)
return {row[0]: uuid.UUID(row[1]) for row in result}
Expand Down Expand Up @@ -173,9 +176,7 @@ def _load_archive_config(
tables = {x[0] for x in cursor.fetchall()}
con.close()

required_tables = {
"aliases",
}
required_tables = REQUIRED_TABLES_ALIAS_ARCHIVE

if not required_tables.issubset(tables):
return None
Expand All @@ -187,7 +188,7 @@ def _set_archive_metadata_value(self, key: str, value: Any):
"""Set custom metadata for the archive."""

sql = text(
"INSERT OR REPLACE INTO archive_metadata (key, value) VALUES (:key, :value)"
f"INSERT OR REPLACE INTO {TABLE_NAME_ARCHIVE_METADATA} (key, value) VALUES (:key, :value)"
)
with self.sqlite_engine.connect() as conn:
params = {"key": key, "value": value}
Expand All @@ -199,7 +200,7 @@ def register_aliases(self, value_id: uuid.UUID, *aliases: str):
alias_created = get_current_time_incl_timezone().isoformat()

sql = text(
"INSERT OR REPLACE INTO aliases (alias, value_id, alias_created) VALUES (:alias, :value_id, :alias_created)"
f"INSERT OR REPLACE INTO {TABLE_NAME_ALIASES} (alias, value_id, alias_created) VALUES (:alias, :value_id, :alias_created)"
)

with self.sqlite_engine.connect() as connection:
Expand Down
50 changes: 25 additions & 25 deletions src/kiara/registries/data/data_store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,31 +198,31 @@ def has_value(self, value_id: uuid.UUID) -> bool:
return False
return value_id in all_value_ids

def retrieve_environment_details(
self, env_type: str, env_hash: str
) -> Mapping[str, Any]:
"""
Retrieve the environment details with the specified type and hash.
The environment is stored by the data store as a dictionary, including it's schema, not as the actual Python model.
This is to make sure it can still be loaded later on, in case the Python model has changed in later versions.
"""
cached = self._env_cache.get(env_type, {}).get(env_hash, None)
if cached is not None:
return cached

env = self._retrieve_environment_details(env_type=env_type, env_hash=env_hash)
self._env_cache.setdefault(env_type, {})[env_hash] = env
return env

@abc.abstractmethod
def _retrieve_environment_details(
self, env_type: str, env_hash: str
) -> Mapping[str, Any]:
"""Retrieve the environment details with the specified type and hash.
Each store needs to implement this so environemnt details related to a value can be retrieved later on. Since in most cases the environment details will not change, a lookup is more efficient than having to store the full information with each value.
"""
# def retrieve_environment_details(
# self, env_type: str, env_hash: str
# ) -> Mapping[str, Any]:
# """
# Retrieve the environment details with the specified type and hash.
#
# The environment is stored by the data store as a dictionary, including it's schema, not as the actual Python model.
# This is to make sure it can still be loaded later on, in case the Python model has changed in later versions.
# """
# cached = self._env_cache.get(env_type, {}).get(env_hash, None)
# if cached is not None:
# return cached
#
# env = self._retrieve_environment_details(env_type=env_type, env_hash=env_hash)
# self._env_cache.setdefault(env_type, {})[env_hash] = env
# return env
#
# @abc.abstractmethod
# def _retrieve_environment_details(
# self, env_type: str, env_hash: str
# ) -> Mapping[str, Any]:
# """Retrieve the environment details with the specified type and hash.
#
# Each store needs to implement this so environemnt details related to a value can be retrieved later on. Since in most cases the environment details will not change, a lookup is more efficient than having to store the full information with each value.
# """

def find_values(self, matcher: ValueMatcher) -> Iterable[Value]:
raise NotImplementedError()
Expand Down
28 changes: 14 additions & 14 deletions src/kiara/registries/data/data_store/filesystem_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,20 +182,20 @@ def get_path(
result.mkdir(parents=True, exist_ok=True)
return result

def _retrieve_environment_details(
self, env_type: str, env_hash: str
) -> Mapping[str, Any]:

base_path = self.get_path(entity_type=EntityType.ENVIRONMENT)
env_details_file = base_path / f"{env_type}_{env_hash}.json"

if not env_details_file.exists():
raise Exception(
f"Can't load environment details, file does not exist: {env_details_file.as_posix()}"
)

environment: Mapping[str, Any] = orjson.loads(env_details_file.read_text())
return environment
# def _retrieve_environment_details(
# self, env_type: str, env_hash: str
# ) -> Mapping[str, Any]:
#
# base_path = self.get_path(entity_type=EntityType.ENVIRONMENT)
# env_details_file = base_path / f"{env_type}_{env_hash}.json"
#
# if not env_details_file.exists():
# raise Exception(
# f"Can't load environment details, file does not exist: {env_details_file.as_posix()}"
# )
#
# environment: Mapping[str, Any] = orjson.loads(env_details_file.read_text())
# return environment

def retrieve_all_job_hashes(
self,
Expand Down
Loading

0 comments on commit 5ebfc9f

Please sign in to comment.