Skip to content
This repository has been archived by the owner on Jan 2, 2025. It is now read-only.

Commit

Permalink
๐Ÿšš Remove reference ORM from Label and add it to Feature, add slot to โ€ฆ
Browse files Browse the repository at this point in the history
โ€ฆFeatureSet link models (#238)

* ๐Ÿšš Remove reference ORM from Label and add it to Feature

* ๐Ÿšš Add slot to FeatureSet link models

* ๐Ÿ“ Clarify in docstrings

* ๐Ÿ’š Fix

* ๐Ÿšš Add features property

* ๐Ÿ’š Try to fix

* ๐Ÿ’š Try to fix

* ๐Ÿ’š Fix

* ๐Ÿ“ Better documentation

* ๐Ÿ“ Prettify docs

* ๐Ÿ’š Fix column name

* โœ๏ธ Fix typo

* ๐Ÿ’š Fix
  • Loading branch information
falexwolf authored Jul 24, 2023
1 parent ab79655 commit 76ea269
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Generated by Django 4.2.2 on 2023-07-24 16:50

import django.db.models.deletion
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("lnschema_core", "0011_label_remove_tag_created_by_remove_tag_parents_and_more"),
]

operations = [
migrations.RemoveField(
model_name="label",
name="ref_id",
),
migrations.RemoveField(
model_name="label",
name="ref_orm",
),
migrations.RemoveField(
model_name="label",
name="ref_schema",
),
migrations.AddField(
model_name="feature",
name="labels_orm",
field=models.CharField(db_index=True, default=None, max_length=40, null=True),
),
migrations.AddField(
model_name="feature",
name="labels_schema",
field=models.CharField(db_index=True, default=None, max_length=40, null=True),
),
migrations.CreateModel(
name="FileFeatureSet",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("slot", models.CharField(default=None, max_length=40, null=True)),
("featureset", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="lnschema_core.featureset")),
("file", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="lnschema_core.file")),
],
options={
"unique_together": {("file", "featureset")},
},
),
migrations.CreateModel(
name="DatasetFeatureSet",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("slot", models.CharField(default=None, max_length=50, null=True)),
("dataset", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="lnschema_core.dataset")),
("featureset", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="lnschema_core.featureset")),
],
options={
"unique_together": {("dataset", "featureset")},
},
),
migrations.RunSQL("CREATE TABLE lnschema_core_filefeatureset_tmp (id BIGINT, file_id TEXT, featureset_id TEXT)"),
migrations.RunSQL("INSERT INTO lnschema_core_filefeatureset_tmp (id, file_id, featureset_id) SELECT id, file_id, featureset_id from lnschema_core_file_feature_sets"),
migrations.RemoveField(
model_name="file",
name="feature_sets",
),
migrations.RemoveField(
model_name="dataset",
name="feature_sets",
),
migrations.AddField(
model_name="dataset",
name="feature_sets",
field=models.ManyToManyField(related_name="datasets", through="lnschema_core.DatasetFeatureSet", to="lnschema_core.featureset"),
),
migrations.AddField(
model_name="file",
name="feature_sets",
field=models.ManyToManyField(related_name="files", through="lnschema_core.FileFeatureSet", to="lnschema_core.featureset"),
),
migrations.RunSQL("INSERT INTO lnschema_core_filefeatureset (id, file_id, featureset_id) SELECT id, file_id, featureset_id from lnschema_core_filefeatureset_tmp"),
migrations.RunSQL("DROP TABLE lnschema_core_filefeatureset_tmp"),
]
47 changes: 35 additions & 12 deletions lnschema_core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,12 +732,6 @@ class Label(ORM):
"""Parent labels, useful to hierarchically group labels (optional)."""
feature = models.ForeignKey("Feature", CASCADE, related_name="labels", null=True, default=None)
"""The feature in which the label is sampled (optional)."""
ref_id = models.CharField(max_length=20, default=None, null=True)
"""Record from a reference ontology (optional)."""
ref_orm = models.CharField(max_length=30, default=None, null=True)
"""ORM providing the reference ontology (optional)."""
ref_schema = models.CharField(max_length=30, default=None, null=True)
"""Schema of the ORM (optional)."""
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
"""Time of creation of record."""
updated_at = models.DateTimeField(auto_now=True, db_index=True)
Expand Down Expand Up @@ -805,16 +799,20 @@ class Feature(ORM):
name = models.CharField(max_length=255, db_index=True, default=None)
"""Name of feature (required)."""
type = models.CharField(max_length=64, db_index=True, default=None)
"""Simple type ("float", "int", "str", "categorical").
"""Simple type ("float", "int", "str", "category").
If "categorical", consider managing categories with :class:`~lamindb.Label`.
If "category", consider managing categories with :class:`~lamindb.Label` or another label ORM.
"""
unit = models.CharField(max_length=30, db_index=True, null=True, default=None)
"""Unit of measure, ideally SI (`m`, `s`, `kg`, etc.) or 'normalized' etc."""
"""Unit of measure, ideally SI (`m`, `s`, `kg`, etc.) or 'normalized' etc. (optional)"""
description = models.TextField(db_index=True, null=True, default=None)
"""A description."""
labels_orm = models.CharField(max_length=40, db_index=True, default=None, null=True)
"""ORM providing the vocabulary for labels, e.g., :class:`lnschema_bionty.CellLine` (optional)."""
labels_schema = models.CharField(max_length=40, db_index=True, default=None, null=True)
"""Schema of the ORM (optional)."""
synonyms = models.TextField(null=True, default=None)
"""Bar-separated (|) synonyms."""
"""Bar-separated (|) synonyms (optional)."""
feature_sets = models.ManyToManyField("FeatureSet", related_name="features")
"""Feature sets linked to this feature."""
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
Expand Down Expand Up @@ -1107,7 +1105,7 @@ class File(ORM):
"""
hash_type = models.CharField(max_length=30, db_index=True, null=True, default=None)
"""Type of hash."""
feature_sets = models.ManyToManyField(FeatureSet, related_name="files")
feature_sets = models.ManyToManyField(FeatureSet, related_name="files", through="FileFeatureSet")
"""The feature sets measured in the file (see :class:`~lamindb.FeatureSet`)."""
transform = models.ForeignKey(Transform, PROTECT, related_name="files", null=True, default=None)
""":class:`~lamindb.Transform` whose run created the `file`."""
Expand Down Expand Up @@ -1454,6 +1452,13 @@ def save(self, *args, **kwargs) -> None:
"""
pass

@property
def features(self):
"""Feature manager (:class:`~lamindb.dev.FeatureManager`)."""
from lamindb._feature_manager import FeatureManager

return FeatureManager(self)


class Dataset(ORM):
"""Datasets.
Expand Down Expand Up @@ -1509,7 +1514,7 @@ class Dataset(ORM):
"""A description."""
hash = models.CharField(max_length=86, db_index=True, null=True, default=None)
"""Hash of dataset content. 86 base64 chars allow to store 64 bytes, 512 bits."""
feature_sets = models.ManyToManyField("FeatureSet", related_name="datasets")
feature_sets = models.ManyToManyField("FeatureSet", related_name="datasets", through="DatasetFeatureSet")
"""The feature sets measured in this dataset (see :class:`~lamindb.FeatureSet`)."""
labels = models.ManyToManyField("Label", related_name="datasets")
"""Categories of categorical features sampled in the dataset (see :class:`~lamindb.Feature`)."""
Expand All @@ -1525,6 +1530,24 @@ class Dataset(ORM):
"""Creator of record, a :class:`~lamindb.User`."""


class FileFeatureSet(ORM):
file = models.ForeignKey(File, on_delete=models.CASCADE)
featureset = models.ForeignKey(FeatureSet, on_delete=models.CASCADE)
slot = models.CharField(max_length=40, null=True, default=None)

class Meta:
unique_together = ("file", "featureset")


class DatasetFeatureSet(ORM):
dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE)
featureset = models.ForeignKey(FeatureSet, on_delete=models.CASCADE)
slot = models.CharField(max_length=50, null=True, default=None)

class Meta:
unique_together = ("dataset", "featureset")


# -------------------------------------------------------------------------------------
# Low-level logic needed in lamindb-setup

Expand Down

0 comments on commit 76ea269

Please sign in to comment.