Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent delete orphans #1002

Merged
merged 21 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
tests/_data/*
wget-log*

# Translations
*.mo
Expand Down Expand Up @@ -128,6 +129,7 @@ dmypy.json
.pyre/

# Test Data Files
tests/_data/*
*.dat
*.mda
*.rec
Expand Down
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
- Add pytests for position pipeline, various `test_mode` exceptions #966
- Migrate `pip` dependencies from `environment.yml`s to `pyproject.toml` #966
- Add documentation for common error messages #997
- Allow mixin tables with parallelization in `make` to run populate with `processes > 1` #1001
- Expand `delete_downstream_merge` -> `delete_downstream_parts`. #1002
- `cautious_delete` now checks `IntervalList` and externals tables. #1002
- Allow mixin tables with parallelization in `make` to run populate with
`processes > 1` #1001

### Pipelines

Expand Down
37 changes: 23 additions & 14 deletions docs/src/misc/mixin.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,29 +136,38 @@ masters, or null entry masters without matching data.

For [Merge tables](./merge_tables.md), this is a significant problem. If a user
wants to delete all entries associated with a given session, she must find all
Merge entries and delete them in the correct order. The mixin provides a
function, `delete_downstream_merge`, to handle this, which is run by default
when calling `delete`.
part table entries, including Merge tables, and delete them in the correct
order. The mixin provides a function, `delete_downstream_parts`, to handle this,
which is run by default when calling `delete`.

`delete_downstream_merge`, also aliased as `ddm`, identifies all Merge tables
downstream of where it is called. If `dry_run=True`, it will return a list of
entries that would be deleted, otherwise it will delete them.
`delete_downstream_parts`, also aliased as `ddp`, identifies all part tables
with foreign key references downstream of where it is called. If `dry_run=True`,
it will return a list of entries that would be deleted, otherwise it will delete
them.

Importantly, `delete_downstream_merge` cannot properly interact with tables that
Importantly, `delete_downstream_parts` cannot properly interact with tables that
have not been imported into the current namespace. If you are having trouble
with part deletion errors, import the offending table and rerun the function
with `reload_cache=True`.

```python
import datajoint as dj
from spyglass.common import Nwbfile

restricted_nwbfile = Nwbfile() & "nwb_file_name LIKE 'Name%'"
restricted_nwbfile.delete_downstream_merge(dry_run=False)
# DataJointError("Attempt to delete part table MyMerge.Part before ...

vanilla_dj_table = dj.FreeTable(dj.conn(), Nwbfile.full_table_name)
vanilla_dj_table.delete()
# DataJointError("Attempt to delete part table MyMerge.Part before ... ")

restricted_nwbfile.delete()
# [WARNING] Spyglass: No part deletes found w/ Nwbfile ...
# OR
# ValueError("Please import MyMerge and try again.")

from spyglass.example import MyMerge

restricted_nwbfile.delete_downstream_merge(reload_cache=True, dry_run=False)
restricted_nwbfile.delete_downstream_parts(reload_cache=True, dry_run=False)
```

Because each table keeps a cache of downstream merge tables, it is important to
Expand All @@ -169,13 +178,13 @@ Speed gains can also be achieved by avoiding re-instancing the table each time.
# Slow
from spyglass.common import Nwbfile

(Nwbfile() & "nwb_file_name LIKE 'Name%'").ddm(dry_run=False)
(Nwbfile() & "nwb_file_name LIKE 'Other%'").ddm(dry_run=False)
(Nwbfile() & "nwb_file_name LIKE 'Name%'").ddp(dry_run=False)
(Nwbfile() & "nwb_file_name LIKE 'Other%'").ddp(dry_run=False)

# Faster
from spyglass.common import Nwbfile

nwbfile = Nwbfile()
(nwbfile & "nwb_file_name LIKE 'Name%'").ddm(dry_run=False)
(nwbfile & "nwb_file_name LIKE 'Other%'").ddm(dry_run=False)
(nwbfile & "nwb_file_name LIKE 'Name%'").ddp(dry_run=False)
(nwbfile & "nwb_file_name LIKE 'Other%'").ddp(dry_run=False)
```
2 changes: 1 addition & 1 deletion notebooks/01_Insert_Data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2134,7 +2134,7 @@
"```python\n",
"nwbfile = sgc.Nwbfile()\n",
"\n",
"(nwbfile & {\"nwb_file_name\": nwb_copy_file_name}).delete_downstream_merge(\n",
"(nwbfile & {\"nwb_file_name\": nwb_copy_file_name}).delete_downstream_parts(\n",
" dry_run=False, # True will show Merge Table entries that would be deleted\n",
")\n",
"```\n",
Expand Down
10 changes: 5 additions & 5 deletions notebooks/03_Merge_Tables.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
"import spyglass.common as sgc\n",
"import spyglass.lfp as lfp\n",
"from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename\n",
"from spyglass.utils.dj_merge_tables import delete_downstream_merge, Merge\n",
"from spyglass.utils.dj_merge_tables import delete_downstream_parts, Merge\n",
"from spyglass.common.common_ephys import LFP as CommonLFP # Upstream 1\n",
"from spyglass.lfp.lfp_merge import LFPOutput # Merge Table\n",
"from spyglass.lfp.v1.lfp import LFPV1 # Upstream 2"
Expand Down Expand Up @@ -955,8 +955,8 @@
"2. use `merge_delete_parent` to delete from the parent sources, getting rid of\n",
" the entries in the source table they came from.\n",
"\n",
"3. use `delete_downstream_merge` to find Merge Tables downstream of any other\n",
" table and get rid full entries, avoiding orphaned master table entries.\n",
"3. use `delete_downstream_parts` to find downstream part tables, like Merge \n",
" Tables, and get rid full entries, avoiding orphaned master table entries.\n",
"\n",
"The two latter cases can be destructive, so we include an extra layer of\n",
"protection with `dry_run`. When true (by default), these functions return\n",
Expand Down Expand Up @@ -1016,7 +1016,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"`delete_downstream_merge` is available from any other table in the pipeline,\n",
"`delete_downstream_parts` is available from any other table in the pipeline,\n",
"but it does take some time to find the links downstream. If you're using this,\n",
"you can save time by reassigning your table to a variable, which will preserve\n",
"a copy of the previous search.\n",
Expand Down Expand Up @@ -1056,7 +1056,7 @@
"source": [
"nwbfile = sgc.Nwbfile()\n",
"\n",
"(nwbfile & nwb_file_dict).delete_downstream_merge(\n",
"(nwbfile & nwb_file_dict).delete_downstream_parts(\n",
" dry_run=True,\n",
" reload_cache=False, # if still encountering errors, try setting this to True\n",
")"
Expand Down
2 changes: 1 addition & 1 deletion notebooks/py_scripts/01_Insert_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@
# ```python
# nwbfile = sgc.Nwbfile()
#
# (nwbfile & {"nwb_file_name": nwb_copy_file_name}).delete_downstream_merge(
# (nwbfile & {"nwb_file_name": nwb_copy_file_name}).delete_downstream_parts(
# dry_run=False, # True will show Merge Table entries that would be deleted
# )
# ```
Expand Down
12 changes: 6 additions & 6 deletions notebooks/py_scripts/03_Merge_Tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.15.2
# jupytext_version: 1.16.0
# kernelspec:
# display_name: spy
# language: python
Expand Down Expand Up @@ -64,7 +64,7 @@
import spyglass.common as sgc
import spyglass.lfp as lfp
from spyglass.utils.nwb_helper_fn import get_nwb_copy_filename
from spyglass.utils.dj_merge_tables import delete_downstream_merge, Merge
from spyglass.utils.dj_merge_tables import delete_downstream_parts, Merge
from spyglass.common.common_ephys import LFP as CommonLFP # Upstream 1
from spyglass.lfp.lfp_merge import LFPOutput # Merge Table
from spyglass.lfp.v1.lfp import LFPV1 # Upstream 2
Expand Down Expand Up @@ -192,8 +192,8 @@
# 2. use `merge_delete_parent` to delete from the parent sources, getting rid of
# the entries in the source table they came from.
#
# 3. use `delete_downstream_merge` to find Merge Tables downstream of any other
# table and get rid full entries, avoiding orphaned master table entries.
# 3. use `delete_downstream_parts` to find downstream part tables, like Merge
# Tables, and get rid full entries, avoiding orphaned master table entries.
#
# The two latter cases can be destructive, so we include an extra layer of
# protection with `dry_run`. When true (by default), these functions return
Expand All @@ -204,7 +204,7 @@

LFPOutput.merge_delete_parent(restriction=nwb_file_dict, dry_run=True)

# `delete_downstream_merge` is available from any other table in the pipeline,
# `delete_downstream_parts` is available from any other table in the pipeline,
# but it does take some time to find the links downstream. If you're using this,
# you can save time by reassigning your table to a variable, which will preserve
# a copy of the previous search.
Expand All @@ -216,7 +216,7 @@
# +
nwbfile = sgc.Nwbfile()

(nwbfile & nwb_file_dict).delete_downstream_merge(
(nwbfile & nwb_file_dict).delete_downstream_parts(
dry_run=True,
reload_cache=False, # if still encountering errors, try setting this to True
)
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ ignore-words-list = 'nevers'
minversion = "7.0"
addopts = [
# "-sv", # no capture, verbose output
# "--sw", # stepwise: resume with next test after failure
# "--pdb", # drop into debugger on failure
"--sw", # stepwise: resume with next test after failure
"--pdb", # drop into debugger on failure
"-p no:warnings",
# "--no-teardown", # don't teardown the database after tests
# "--quiet-spy", # don't show logging from spyglass
Expand Down
7 changes: 7 additions & 0 deletions src/spyglass/common/common_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from spyglass.utils import SpyglassMixin, logger
from spyglass.utils.dj_helper_fn import get_child_tables

from .common_session import Session # noqa: F401

Expand Down Expand Up @@ -152,6 +153,12 @@ def plot_epoch_pos_raw_intervals(self, figsize=(20, 5), return_fig=False):
if return_fig:
return fig

def nightly_cleanup(self, dry_run=True):
orphans = self - get_child_tables(self)
if dry_run:
return orphans
orphans.super_delete(warn=False)


def intervals_by_length(interval_list, min_length=0.0, max_length=1e10):
"""Select intervals of certain lengths from an interval list.
Expand Down
21 changes: 12 additions & 9 deletions src/spyglass/common/common_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pynwb import NWBHDF5IO

from spyglass.common.common_nwbfile import AnalysisNwbfile, Nwbfile
from spyglass.settings import export_dir
from spyglass.settings import export_dir, test_mode
from spyglass.utils import SpyglassMixin, logger
from spyglass.utils.dj_graph import RestrGraph
from spyglass.utils.dj_helper_fn import (
Expand Down Expand Up @@ -103,7 +103,8 @@ def insert1_return_pk(self, key: dict, **kwargs) -> int:
export_id = query.fetch1("export_id")
export_key = {"export_id": export_id}
if query := (Export & export_key):
query.super_delete(warn=False)
safemode = False if test_mode else None # No prompt in tests
query.super_delete(warn=False, safemode=safemode)
logger.info(f"{status} {export_key}")
return export_id

Expand Down Expand Up @@ -174,9 +175,11 @@ def _max_export_id(self, paper_id: str, return_all=False) -> int:
all_export_ids = query.fetch("export_id")
return all_export_ids if return_all else max(all_export_ids)

def paper_export_id(self, paper_id: str) -> dict:
def paper_export_id(self, paper_id: str, return_all=False) -> dict:
"""Return the maximum export_id for a paper, used to populate Export."""
return {"export_id": self._max_export_id(paper_id)}
if not return_all:
return {"export_id": self._max_export_id(paper_id)}
return [{"export_id": id} for id in self._max_export_id(paper_id, True)]


@schema
Expand Down Expand Up @@ -215,11 +218,11 @@ def populate_paper(self, paper_id: Union[str, dict]):
self.populate(ExportSelection().paper_export_id(paper_id))

def make(self, key):
query = ExportSelection & key
paper_key = query.fetch("paper_id", as_dict=True)[0]
paper_key = (ExportSelection & key).fetch("paper_id", as_dict=True)[0]
query = ExportSelection & paper_key

# Null insertion if export_id is not the maximum for the paper
all_export_ids = query._max_export_id(paper_key, return_all=True)
all_export_ids = ExportSelection()._max_export_id(paper_key, True)
max_export_id = max(all_export_ids)
if key.get("export_id") != max_export_id:
logger.info(
Expand All @@ -240,7 +243,7 @@ def make(self, key):
(self.Table & id_dict).delete_quick()
(self.Table & id_dict).delete_quick()

restr_graph = query.get_restr_graph(paper_key)
restr_graph = ExportSelection().get_restr_graph(paper_key)
file_paths = unique_dicts( # Original plus upstream files
query.list_file_paths(paper_key) + restr_graph.file_paths
)
Expand All @@ -256,7 +259,7 @@ def make(self, key):
# Writes but does not run mysqldump. Assumes single version per paper.
version_key = query.fetch("spyglass_version", as_dict=True)[0]
self.write_export(
free_tables=restr_graph.all_ft, **paper_key, **version_key
free_tables=restr_graph.restr_ft, **paper_key, **version_key
)

self.insert1({**key, **paper_key})
Expand Down
1 change: 1 addition & 0 deletions src/spyglass/decoding/v0/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
ObservationModel,
)
except ImportError as e:
RandomWalk, Uniform, Environment, ObservationModel = None, None, None, None
logger.warning(e)

from spyglass.common.common_behav import PositionIntervalMap, RawPosition
Expand Down
Loading
Loading