From 11f7cfc7ec82acb17264a689d7ebaa524584710c Mon Sep 17 00:00:00 2001 From: Samuel Bray Date: Fri, 6 Dec 2024 19:05:59 -0800 Subject: [PATCH] Cleanup IntervalList orphans in weekly job only (#1195) * cleanup interval orphans in nightly job only * update changelog * Update docs/src/ForDevelopers/Management.md Co-authored-by: Chris Broz * suggest less frequent running of IntervalList cleanup --------- Co-authored-by: Chris Broz --- CHANGELOG.md | 2 ++ docs/src/ForDevelopers/Management.md | 11 +++++++++-- src/spyglass/common/common_interval.py | 2 +- src/spyglass/utils/dj_mixin.py | 3 --- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 781f66fe9..6f3ce4cc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,8 +44,10 @@ dj.FreeTable(dj.conn(), "common_session.session_group").drop() - Merge table delete removes orphaned master entries #1164 - Edit `merge_fetch` to expect positional before keyword arguments #1181 - Allow part restriction `SpyglassMixinPart.delete` #1192 +- Move cleanup of `IntervalList` orphan entries to cron job cleanup process #1195 - Add mixin method `get_fully_defined_key` #1198 + ### Pipelines - Common diff --git a/docs/src/ForDevelopers/Management.md b/docs/src/ForDevelopers/Management.md index 5c00d2688..df0caae81 100644 --- a/docs/src/ForDevelopers/Management.md +++ b/docs/src/ForDevelopers/Management.md @@ -228,10 +228,16 @@ disk. There are several tables that retain lists of files that have been generated during analyses. If someone deletes analysis entries, files will still be on disk. -To remove orphaned files, we run the following commands in our cron jobs: +Additionally, there are periphery tables such as `IntervalList` which are used +to store entries created by downstream tables. These entries are not +automatically deleted when the downstream entry is removed. To minimize interference +with ongoing user entry creation, we recommend running these cleanups on a less frequent +basis (e.g. weekly). + +To remove orphaned files and entries, we run the following commands in our cron jobs: ```python -from spyglass.common import AnalysisNwbfile +from spyglass.common import AnalysisNwbfile, IntervalList from spyglass.spikesorting import SpikeSorting from spyglass.common.common_nwbfile import schema as nwbfile_schema from spyglass.decoding.v1.sorted_spikes import schema as spikes_schema @@ -241,6 +247,7 @@ from spyglass.decoding.v1.clusterless import schema as clusterless_schema def main(): AnalysisNwbfile().nightly_cleanup() SpikeSorting().nightly_cleanup() + IntervalList().cleanup() nwbfile_schema.external['analysis'].delete(delete_external_files=True)) nwbfile_schema.external['raw'].delete(delete_external_files=True)) spikes_schema.external['analysis'].delete(delete_external_files=True)) diff --git a/src/spyglass/common/common_interval.py b/src/spyglass/common/common_interval.py index 25670f03c..2021c5f69 100644 --- a/src/spyglass/common/common_interval.py +++ b/src/spyglass/common/common_interval.py @@ -158,7 +158,7 @@ def plot_epoch_pos_raw_intervals(self, figsize=(20, 5), return_fig=False): if return_fig: return fig - def nightly_cleanup(self, dry_run=True): + def cleanup(self, dry_run=True): """Clean up orphaned IntervalList entries.""" orphans = self - get_child_tables(self) if dry_run: diff --git a/src/spyglass/utils/dj_mixin.py b/src/spyglass/utils/dj_mixin.py index 2df4844a4..3fcafb71d 100644 --- a/src/spyglass/utils/dj_mixin.py +++ b/src/spyglass/utils/dj_mixin.py @@ -507,9 +507,6 @@ def cautious_delete( delete_external_files=True, display_progress=False ) - if not self._test_mode: - _ = IntervalList().nightly_cleanup(dry_run=False) - def delete(self, *args, **kwargs): """Alias for cautious_delete, overwrites datajoint.table.Table.delete""" self.cautious_delete(*args, **kwargs)