Skip to content

Commit

Permalink
🎨 Integrate ln.record into lns.DObject → ln.DObject (#400)
Browse files Browse the repository at this point in the history
Co-authored-by: Alex Wolf <f.alexander.wolf@gmail.com>
  • Loading branch information
sunnyosun and falexwolf authored Dec 9, 2022
1 parent fdfa436 commit 726fdd8
Show file tree
Hide file tree
Showing 20 changed files with 95 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Cache
uses: actions/cache@v3
env:
cache-name: cache-0
cache-name: cache-all
with:
path: |
.nox
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ repos:
- id: pydocstyle
exclude: lamindb/schema/dev/__init__.py
args: # google style + __init__, see http://www.pydocstyle.org/en/stable/error_codes.html
- --ignore=D100,D101,D102,D103,D106,D107,D203,D204,D213,D215,D400,D401,D403,D404,D406,D407,D408,D409,D412,D413
- --ignore=D100,D101,D102,D103,D106,D107,D203,D204,D213,D215,D400,D401,D403,D404,D406,D407,D408,D409,D412,D413,D418
2 changes: 1 addition & 1 deletion docs/faq/ingest-bfx.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(filepath, source=run)"
"dobject = ln.DObject(filepath, source=run)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/faq/ingest-same-file-twice.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(filepath)"
"dobject = ln.DObject(filepath)"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions docs/faq/streaming-objects.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@
"metadata": {},
"outputs": [],
"source": [
"pbmc68k_dobj = ln.record(pbmc68k, name=\"pbmc68k\", source=run)\n",
"pbmc3k_dobj = ln.record(pbmc3k, name=\"pbmc3k\", source=run)"
"pbmc68k_dobj = ln.DObject(pbmc68k, name=\"pbmc68k\", source=run)\n",
"pbmc3k_dobj = ln.DObject(pbmc3k, name=\"pbmc3k\", source=run)"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/faq/zarr-ingest-load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(adata, name=\"pbmc68k_reduced\", format=\"zarr\", source=run)"
"dobject = ln.DObject(adata, name=\"pbmc68k_reduced\", format=\"zarr\", source=run)"
]
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
"metadata": {},
"outputs": [],
"source": [
"adata = ln.load(dobject)"
"adata = dobject.load()"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions docs/guide/03-knowledge.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.12 ('base1')",
"display_name": "Python 3.10.8 64-bit",
"language": "python",
"name": "python3"
},
Expand All @@ -302,7 +302,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.10.8"
},
"nbproject": {
"id": "f7F0c2n2Ft1s",
Expand All @@ -316,7 +316,7 @@
},
"vscode": {
"interpreter": {
"hash": "2775e555cdc2d728c54aa22130c79afb1fa4da64f22f2fc6dcc2aa346c4e0672"
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
Expand Down
4 changes: 2 additions & 2 deletions docs/guide/04-ingest.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(filepath)"
"dobject = ln.DObject(filepath)"
]
},
{
Expand Down Expand Up @@ -188,7 +188,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(df, name=\"iris\");"
"dobject = ln.DObject(df, name=\"iris\");"
]
},
{
Expand Down
6 changes: 3 additions & 3 deletions docs/guide/05-link-features.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(adata, name=\"Mouse Lymph Node scRNA-seq\", features_ref=reference)"
"dobject = ln.DObject(adata, name=\"Mouse Lymph Node scRNA-seq\", features_ref=reference)"
]
},
{
Expand Down Expand Up @@ -189,7 +189,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Because the file is a standard `.fcs` file, `ln.record` - under the hood - can parse it using [readfcs](https://lamin.ai/docs/readfcs).\n",
"Because the file is a standard `.fcs` file, `ln.DObject` - under the hood - can parse it using [readfcs](https://lamin.ai/docs/readfcs).\n",
"\n",
"Alternatively, we can load it into memory: `AnnData = readfcs.read_fcs(filepath)`."
]
Expand All @@ -216,7 +216,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(filepath, features_ref=reference);"
"dobject = ln.DObject(filepath, features_ref=reference);"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/guide/06-select-load.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@
"source": [
"dobject = ln.select(lns.DObject, name=\"iris\").first()\n",
"\n",
"df = ln.load(dobject)"
"df = dobject.load()"
]
},
{
Expand Down Expand Up @@ -491,7 +491,7 @@
"metadata": {},
"outputs": [],
"source": [
"ln.load(dobject)"
"dobject.load()"
]
},
{
Expand Down
8 changes: 4 additions & 4 deletions docs/guide/07-link-samples.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@
"metadata": {},
"outputs": [],
"source": [
"tissue = lns.bionty.Tissue(id=tissue_lookup.lymph_node)"
"tissue = lns.bionty.Tissue(ontology_id=tissue_lookup.lymph_node)"
]
},
{
Expand Down Expand Up @@ -328,7 +328,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3.10.8 64-bit",
"language": "python",
"name": "python3"
},
Expand All @@ -342,7 +342,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.10.8"
},
"nbproject": {
"id": "zMCvXplQ8kTk",
Expand All @@ -353,7 +353,7 @@
},
"vscode": {
"interpreter": {
"hash": "2775e555cdc2d728c54aa22130c79afb1fa4da64f22f2fc6dcc2aa346c4e0672"
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
}
}
},
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/09-pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
"metadata": {},
"outputs": [],
"source": [
"dobject = ln.record(filepath, source=run)"
"dobject = ln.DObject(filepath, source=run)"
]
},
{
Expand Down
7 changes: 3 additions & 4 deletions lamindb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@
.. autosummary::
:toctree: .
record
DObject
add
load
subset
delete
Expand Down Expand Up @@ -69,13 +68,13 @@
raise RuntimeError("Please run `lndb init` to configure an instance.")
_check_migrate(usettings=settings.user, isettings=settings.instance)

from lnschema_core import DObject # noqa

from . import dev # noqa
from . import knowledge # noqa
from . import nb # noqa
from . import schema # noqa
from ._delete import delete # noqa
from ._load import load # noqa
from ._record import record # noqa
from ._subset import subset
from ._view import view # noqa
from .dev.db import session # noqa
Expand Down
12 changes: 6 additions & 6 deletions lamindb/_check_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from nbproject import __version__ as nbproject_v
from packaging import version

if version.parse(lndb_setup_v) != version.parse("0.24.3"):
raise RuntimeError("lamindb needs lndb_setup==0.24.3")
if version.parse(lndb_setup_v) != version.parse("0.24.4"):
raise RuntimeError("lamindb needs lndb_setup==0.24.4")

if version.parse(lnschema_core_v) != version.parse("0.22.1"):
raise RuntimeError("lamindb needs lnschema_core==0.22.1")
if version.parse(lnschema_core_v) != version.parse("0.23.1"):
raise RuntimeError("lamindb needs lnschema_core==0.23.1")

if version.parse(bionty_v) != version.parse("0.5.6"):
raise RuntimeError("lamindb needs bionty==0.5.6")
if version.parse(bionty_v) != version.parse("0.5.7"):
raise RuntimeError("lamindb needs bionty==0.5.7")

if version.parse(nbproject_v) < version.parse("0.8.0"):
raise RuntimeError("lamindb needs nbproject>=0.8.0")
35 changes: 19 additions & 16 deletions lamindb/_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import pandas as pd
from lamin_logger import logger
from lndb_setup import settings
from lnschema_core import DObject, Features, Run, Storage
from lnschema_core import DObject as lns_DObject
from lnschema_core import Features, Run, Storage
from typeguard import typechecked

from lamindb.knowledge import CellMarker, Gene, Protein
Expand Down Expand Up @@ -45,7 +46,7 @@ def serialize(
def get_hash(local_filepath, suffix):
if suffix != ".zarr": # if not streamed
hash = hash_file(local_filepath)
result = select(DObject, hash=hash).one_or_none()
result = select(lns_DObject, hash=hash).one_or_none()
if result is not None:
logger.warning(
"Based on the MD5 hash, the same data object is already"
Expand Down Expand Up @@ -153,11 +154,11 @@ def parse_features(
return features


def get_features(dobject, features_ref):
def get_features(dobject_privates, features_ref):
"""Updates dobject in place."""
memory_rep = dobject._memory_rep
memory_rep = dobject_privates["_memory_rep"]
if memory_rep is None:
memory_rep = load_to_memory(dobject._local_filepath)
memory_rep = load_to_memory(dobject_privates["_local_filepath"])
try:
df = getattr(memory_rep, "var") # for AnnData objects
if callable(df):
Expand All @@ -178,15 +179,14 @@ def get_run(run: Optional[Run]) -> Run:


@typechecked
def record(
def get_dobject_kwargs_from_data(
data: Union[Path, str, pd.DataFrame, ad.AnnData],
*,
name: Optional[str] = None,
features_ref: Optional[Union[CellMarker, Gene, Protein]] = None,
source: Optional[Run] = None,
id: Optional[str] = None,
format: Optional[str] = None,
) -> DObject:
):
"""Record a data object.
Guide: :doc:`/db/guide/ingest`.
Expand All @@ -207,22 +207,25 @@ def record(
size = size_adata(memory_rep)
hash = get_hash(local_filepath, suffix)
storage = select(Storage, root=str(settings.instance.storage_root)).one()
dobject = DObject(
dobject_privates = dict(
_local_filepath=local_filepath,
_memory_rep=memory_rep,
)
if features_ref is not None:
features = [get_features(dobject_privates, features_ref)] # has to be list!
else:
features = []
dobject_kwargs = dict(
name=name,
suffix=suffix,
hash=hash,
run_id=run.id,
size=size,
storage_id=storage.id,
source=run,
features=features,
)
if id is not None: # cannot pass it into constructor due to default factory
dobject.id = id
dobject._local_filepath = local_filepath
dobject._memory_rep = memory_rep
if features_ref is not None:
dobject.features.append(get_features(dobject, features_ref))
return dobject
return dobject_kwargs, dobject_privates


def to_b64_str(bstr: bytes):
Expand Down
3 changes: 2 additions & 1 deletion lamindb/dev/db/_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from ..file import store_file, write_adata_zarr
from ..file._file import print_hook
from ._core import dobject_to_sqm
from ._select import select


Expand Down Expand Up @@ -65,7 +66,7 @@ def add( # type: ignore # no support of different naming of args across overlo
elif isinstance(record, sqm.SQLModel):
records = [record]
else:
model = record
model = dobject_to_sqm(record)
results = select(model, **fields).all()
if len(results) == 1:
return results[0]
Expand Down
19 changes: 19 additions & 0 deletions lamindb/dev/db/_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from typing import Tuple, Union

import sqlmodel as sqm
from lndb_setup import settings
from lnschema_core import DObject


def session() -> sqm.Session:
Expand All @@ -8,3 +11,19 @@ def session() -> sqm.Session:
Returns a `sqlmodel.Session` object.
"""
return settings.instance.session()


def dobject_to_sqm(entity: Union[sqm.SQLModel, Tuple[sqm.SQLModel]]):
def if_dobject(entity):
if entity.__class__.__name__ == "type" and entity.__name__ == "DObject":
return DObject
else:
return entity

if isinstance(entity, tuple):
entities = list(entity)
for i, ent in enumerate(entities):
entities[i] = if_dobject(ent)
return entities
else:
return if_dobject(entity)
Loading

0 comments on commit 726fdd8

Please sign in to comment.