Skip to content

Commit

Permalink
🎨 Working Curator
Browse files Browse the repository at this point in the history
Signed-off-by: zethson <lukas.heumos@posteo.net>
  • Loading branch information
Zethson committed Dec 12, 2024
1 parent 8149d04 commit 2701913
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 19 deletions.
2 changes: 1 addition & 1 deletion lamin_spatial/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@

__version__ = "0.1.0" # denote a pre-release for 0.1.0 with 0.1rc1

from spatialdata_curator import SpatialDataCurator
from lamin_spatial.spatialdata_curator import SpatialDataCurator
46 changes: 28 additions & 18 deletions lamin_spatial/spatialdata_curator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from typing import Any, Iterable, MutableMapping

import lamindb_setup as ln_setup
Expand Down Expand Up @@ -80,8 +81,10 @@ def __init__(
self._sdata = sdata
self._kwargs = {"organism": organism} if organism else {}
self._var_fields = var_index
self._verify_accessor(self._var_fields.keys())
self._categoricals = categoricals # TODO Verify existence and no overlap
# TODO we should properly check for sample. This currently fails because we hard coded spatialdata-db as the attrs key
self._verify_accessor(self._var_fields.keys() - {"sample"})
# TODO consider splitting this up into two types of keys -> sap: for sample and tab: for table stuff
self._categoricals = categoricals
self._tables = set(self._var_fields.keys()) | set(
self._categoricals.keys() - {"sample"}
)
Expand All @@ -91,6 +94,7 @@ def __init__(
self._sample_metadata = self._sdata.get_attrs(
key="spatialdata-db", return_as="df", flatten=True
) # this key will need to be adapted in the future
self._validated = False

if "sample" in self._categoricals.keys():
self._sample_df_curator = DataFrameCurator(
Expand Down Expand Up @@ -139,10 +143,14 @@ def non_validated(self) -> dict[str, dict[str, list[str]]]:
def _verify_accessor(self, accessors: Iterable[str]):
"""Verify that the accessors exist."""
for acc in accessors:
if (
self._sdata.get_attrs(key=acc) is None
and acc not in self._sdata.tables.keys()
):
is_present = False
try:
self._sdata.get_attrs(key=acc)
is_present = True
except KeyError:
if acc in self._sdata.tables.keys():
is_present = True
if not is_present:
raise ValidationError(f"Accessor '{acc} does not exist!")

def lookup(
Expand All @@ -169,7 +177,7 @@ def _update_registry_all(self):
self._sample_df_curator._update_registry_all(
validated_only=True, **self._kwargs
)
for _, adata_curator in self._mod_adata_curators.items():
for _, adata_curator in self._table_adata_curators.items():
adata_curator._update_registry_all(validated_only=True, **self._kwargs)

def add_new_from_var_index(self, table: str, organism: str | None = None, **kwargs):
Expand Down Expand Up @@ -219,7 +227,7 @@ def standardize(self, key: str, accessor: str | None = None):
Inplace modification of the dataset.
"""
if accessor in self._table_adata_curators:
if accessor in self._table_adata_curators.keys():
adata_curator = self._table_adata_curators[accessor]
adata_curator.standardize(key=key)
if accessor == "sample":
Expand Down Expand Up @@ -258,7 +266,7 @@ def validate(self, organism: str | None = None) -> bool:

obs_validated = True
if self._sample_df_curator:
logger.info('validating categoricals of "sample" metadata...')
logger.info("validating categoricals of 'sample' metadata...")
obs_validated &= self._sample_df_curator.validate(**self._kwargs)
self._non_validated["obs"] = self._sample_df_curator.non_validated # type: ignore
logger.print("")
Expand All @@ -281,11 +289,11 @@ def save_artifact(
revises: Artifact | None = None,
run: Run | None = None,
) -> Artifact:
"""Save the validated ``MuData`` and metadata.
"""Save the validated ``SpatialData`` and metadata.
Args:
description: A description of the ``MuData`` object.
key: A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.fcs"`.
description: A description of the ``SpatialData`` object.
key: A path-like key to reference artifact in default storage, e.g., `"myfolder/myfile.zarr"`.
Artifacts with the same key form a revision family.
revises: Previous version of the artifact. Triggers a revision.
run: The run that creates the artifact.
Expand All @@ -302,15 +310,18 @@ def save_artifact(
try:
settings.verbosity = "warning"

# Write the SpatialData object to cache
# TODO This should not be a random number but be done in a canonical lamin way - ask Sergei for feedback
write_path = f"{settings.cache_dir}/{random.randint(10**7, 10**8 - 1)}.zarr"
self._sdata.write(write_path)

# Create the Artifact and associate Artifact metadata
self._artifact = Artifact(
self._sdata,
write_path,
description=description,
columns_field=self._var_fields,
fields=self.categoricals,
key=key,
revises=revises,
run=run,
**self._kwargs,
)
# According to Tim it's not easy to calculate the number of observations.
# We'd have to write custom code to iterate over labels (which might not even exist at that point)
Expand All @@ -335,7 +346,6 @@ def _add_set_from_spatialdata(
obs_fields = {}
assert host._accessor == "spatialdata"

sdata = host.load()
feature_sets = {}

# sample features
Expand All @@ -346,7 +356,7 @@ def _add_set_from_spatialdata(
# table features
for table, field in var_fields.items():
table_fs = parse_feature_sets_from_anndata(
sdata[table],
self._sdata[table],
var_field=field,
obs_field=obs_fields.get(table, Feature.name),
mute=mute,
Expand Down

0 comments on commit 2701913

Please sign in to comment.