Merge branch 'tickets/DM-45750'

lsst · Oct 23, 2024 · 0ef6625 · 0ef6625
2 parents 93883d8 + 76a2d5a
commit 0ef6625
Show file tree

Hide file tree

Showing 33 changed files with 631 additions and 1,213 deletions.
diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ path                  | description
 `config`              | Dataset-specific configs to help the Science Pipelines work with this dataset.
 `pipelines`           | Dataset-specific pipelines to run on this dataset.
 `dataIds.list`        | List of dataIds for use in running Tasks. Currently set to run all Ids.
-`preloaded`           | Starter Gen3 Butler repo containing a skymap, DECam Community Pipeline MasterCalibs from the 2015 HiTS campaign, deep g-band coadds for use as differencing templates, Gaia and PS1 reference catalogs in HTM format for regions overlapping any visit in the dataset, and a pretrained machine learning model for real/bogus classification.
+`preloaded`           | Starter Gen3 Butler repo containing a skymap, DECam Community Pipeline MasterCalibs from the 2015 HiTS campaign, deep g-band coadds for use as differencing templates, Gaia and PS1 reference catalogs in HTM format for regions overlapping any visit in the dataset, mock APDB outputs based on the raw images, and a pretrained machine learning model for real/bogus classification.
 `scripts`             | Scripts and data for generating this dataset.
 
 Git LFS

diff --git a/config/export.yaml b/config/export.yaml
diff --git a/doc/ap_verify_ci_hits2015/index.rst b/doc/ap_verify_ci_hits2015/index.rst
@@ -30,6 +30,7 @@ It contains:
 * biases (``zci``) and g-band flats (``fci``)
 * reference catalogs for Gaia and Pan-STARRS1, covering the raw images' footprint.
 * image differencing templates coadded from HiTS 2014 data, covering the raw images' footprint.
+* mock APDB catalogs based on processing the raw images in order.
 * the rbResnet50-DC2 pretrained machine learning model for real/bogus classification
 
 .. _ap_verify_ci_hits2015-contributing:

diff --git a/pipelines/Ephemerides.yaml b/pipelines/Ephemerides.yaml
@@ -1,4 +1,8 @@
 description: Pipeline for downloading solar system ephemerides given raw images
 instrument: lsst.obs.decam.DarkEnergyCamera
 tasks:
-  SkyBotEphemerisQuery: lsst.ap.association.skyBotEphemerisQuery.SkyBotEphemerisQueryTask
+  getRegionTimeFromVisit:
+    class: lsst.pipe.tasks.getRegionTimeFromVisit.GetRegionTimeFromVisitTask
+    config:
+      connections.dummy_visit = visit_dummy
+  mpSkyEphemerisQuery: lsst.ap.association.MPSkyEphemerisQueryTask
diff --git a/...iaForcedSources/411371/preloaded_diaForcedSources_DECam_411371_N25_dia_catalogs_apdb.parq b/...iaForcedSources/411371/preloaded_diaForcedSources_DECam_411371_N25_dia_catalogs_apdb.parq
diff --git a/...iaForcedSources/411371/preloaded_diaForcedSources_DECam_411371_N29_dia_catalogs_apdb.parq b/...iaForcedSources/411371/preloaded_diaForcedSources_DECam_411371_N29_dia_catalogs_apdb.parq
diff --git a/...iaForcedSources/411420/preloaded_diaForcedSources_DECam_411420_S22_dia_catalogs_apdb.parq b/...iaForcedSources/411420/preloaded_diaForcedSources_DECam_411420_S22_dia_catalogs_apdb.parq
diff --git a/...iaForcedSources/411420/preloaded_diaForcedSources_DECam_411420_S26_dia_catalogs_apdb.parq b/...iaForcedSources/411420/preloaded_diaForcedSources_DECam_411420_S26_dia_catalogs_apdb.parq
diff --git a/...iaForcedSources/419802/preloaded_diaForcedSources_DECam_419802_S22_dia_catalogs_apdb.parq b/...iaForcedSources/419802/preloaded_diaForcedSources_DECam_419802_S22_dia_catalogs_apdb.parq
diff --git a/...iaForcedSources/419802/preloaded_diaForcedSources_DECam_419802_S26_dia_catalogs_apdb.parq b/...iaForcedSources/419802/preloaded_diaForcedSources_DECam_419802_S26_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/411371/preloaded_diaObjects_DECam_411371_N25_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/411371/preloaded_diaObjects_DECam_411371_N25_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/411371/preloaded_diaObjects_DECam_411371_N29_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/411371/preloaded_diaObjects_DECam_411371_N29_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/411420/preloaded_diaObjects_DECam_411420_S22_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/411420/preloaded_diaObjects_DECam_411420_S22_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/411420/preloaded_diaObjects_DECam_411420_S26_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/411420/preloaded_diaObjects_DECam_411420_S26_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/419802/preloaded_diaObjects_DECam_419802_S22_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/419802/preloaded_diaObjects_DECam_419802_S22_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaObjects/419802/preloaded_diaObjects_DECam_419802_S26_dia_catalogs_apdb.parq b/.../preloaded_diaObjects/419802/preloaded_diaObjects_DECam_419802_S26_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/411371/preloaded_diaSources_DECam_411371_N25_dia_catalogs_apdb.parq b/.../preloaded_diaSources/411371/preloaded_diaSources_DECam_411371_N25_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/411371/preloaded_diaSources_DECam_411371_N29_dia_catalogs_apdb.parq b/.../preloaded_diaSources/411371/preloaded_diaSources_DECam_411371_N29_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/411420/preloaded_diaSources_DECam_411420_S22_dia_catalogs_apdb.parq b/.../preloaded_diaSources/411420/preloaded_diaSources_DECam_411420_S22_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/411420/preloaded_diaSources_DECam_411420_S26_dia_catalogs_apdb.parq b/.../preloaded_diaSources/411420/preloaded_diaSources_DECam_411420_S26_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/419802/preloaded_diaSources_DECam_419802_S22_dia_catalogs_apdb.parq b/.../preloaded_diaSources/419802/preloaded_diaSources_DECam_419802_S22_dia_catalogs_apdb.parq
diff --git a/.../preloaded_diaSources/419802/preloaded_diaSources_DECam_419802_S26_dia_catalogs_apdb.parq b/.../preloaded_diaSources/419802/preloaded_diaSources_DECam_419802_S26_dia_catalogs_apdb.parq
diff --git a/preloaded/gen3.sqlite3 b/preloaded/gen3.sqlite3
diff --git a/...0001_4720_0_1520_0_411371_sso_cached.parq → ...sObjects_DECam_411371_N25_sso_cached.parq b/...0001_4720_0_1520_0_411371_sso_cached.parq → ...sObjects_DECam_411371_N25_sso_cached.parq
diff --git a/...so/cached/preloaded_SsObjects/411371/preloaded_SsObjects_DECam_411371_N29_sso_cached.parq b/...so/cached/preloaded_SsObjects/411371/preloaded_SsObjects_DECam_411371_N29_sso_cached.parq
diff --git a/...0001_4720_0_1520_0_411420_sso_cached.parq → ...sObjects_DECam_411371_S26_sso_cached.parq b/...0001_4720_0_1520_0_411420_sso_cached.parq → ...sObjects_DECam_411371_S26_sso_cached.parq
diff --git a/...so/cached/preloaded_SsObjects/411420/preloaded_SsObjects_DECam_411420_S22_sso_cached.parq b/...so/cached/preloaded_SsObjects/411420/preloaded_SsObjects_DECam_411420_S22_sso_cached.parq
diff --git a/...so/cached/preloaded_SsObjects/411420/preloaded_SsObjects_DECam_411420_S26_sso_cached.parq b/...so/cached/preloaded_SsObjects/411420/preloaded_SsObjects_DECam_411420_S26_sso_cached.parq
diff --git a/...0001_4720_0_1520_0_419802_sso_cached.parq → ...sObjects_DECam_419802_S22_sso_cached.parq b/...0001_4720_0_1520_0_419802_sso_cached.parq → ...sObjects_DECam_419802_S22_sso_cached.parq
diff --git a/...so/cached/preloaded_SsObjects/419802/preloaded_SsObjects_DECam_419802_S26_sso_cached.parq b/...so/cached/preloaded_SsObjects/419802/preloaded_SsObjects_DECam_419802_S26_sso_cached.parq
diff --git a/scripts/generate_all_gen3.sh b/scripts/generate_all_gen3.sh
@@ -117,12 +117,17 @@ python "${SCRIPT_DIR}/generate_ephemerides_gen3.py"
 
 "${SCRIPT_DIR}/generate_fake_injection_catalog.sh" -b ${DATASET_REPO} -o ${INJECTION_CATALOG_COLLECTION}
 
+########################################
+# Generate self-consistent APDB data
+
+python "${SCRIPT_DIR}/generate_self_preload.py"  # Must be run after all other ApPipe inputs available
+
 ########################################
 # Final clean-up
 
 butler collection-chain "${DATASET_REPO}" sso sso/cached
-butler collection-chain "${DATASET_REPO}" DECam/defaults templates/goodSeeing skymaps DECam/calib refcats sso \
-    models ${INJECTION_CATALOG_COLLECTION}
+butler collection-chain "${DATASET_REPO}" DECam/defaults templates/goodSeeing skymaps DECam/calib \
+    refcats sso dia_catalogs models ${INJECTION_CATALOG_COLLECTION}
 python "${SCRIPT_DIR}/make_preloaded_export.py"
 
 echo "Gen 3 preloaded repository complete."

diff --git a/scripts/generate_ephemerides_gen3.py b/scripts/generate_ephemerides_gen3.py
@@ -25,6 +25,9 @@
 
 Running this script allows for updates to the ephemerides to be incorporated
 into the dataset.
+
+This script takes no command-line arguments; it infers everything it needs from
+the `preloaded/` repository.
 """
 
 import glob
@@ -34,9 +37,11 @@
 import sys
 import tempfile
 
+import pandas
+
 import lsst.log
 import lsst.sphgeom
-from lsst.daf.butler import Butler, FileDataset
+from lsst.daf.butler import Butler, CollectionType, DatasetType
 import lsst.obs.base
 
 
@@ -46,12 +51,14 @@
 
 # Avoid explicit references to dataset package to maximize portability.
 SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
-PIPE_DIR = os.path.join(SCRIPT_DIR, "..", "pipelines")
-RAW_DIR = os.path.join(SCRIPT_DIR, "..", "raw")
+PIPE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "pipelines"))
+RAW_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "raw"))
 RAW_RUN = "raw"
-EPHEM_DATASET = "visitSsObjects"
-DEST_DIR = os.path.join(SCRIPT_DIR, "..", "preloaded")
-DEST_RUN = "sso/cached"
+VISIT_DATASET = "visit_dummy"
+EPHEM_DATASET = "preloaded_SsObjects"
+DEST_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "preloaded"))
+DEST_COLLECTION = "sso"
+DEST_RUN = DEST_COLLECTION + "/mpsky"
 
 
 ########################################
@@ -92,13 +99,14 @@ def _make_repo_with_instruments(repo_dir, instruments):
     """
     config = Butler.makeRepo(repo_dir)
     repo = Butler(config, writeable=True)
+    logging.debug("Temporary repo has universe version %d.", repo.dimensions.version)
     for instrument in instruments:
         instrument.register(repo.registry)
     return repo
 
 
 ########################################
-# Ingest raws (needed for visitinfo)
+# Ingest raws (needed for visit records)
 
 def _ingest_raws(repo, raw_dir, run):
     """Ingest this dataset's raws into a specific repo.
@@ -112,21 +120,41 @@ def _ingest_raws(repo, raw_dir, run):
     run : `str`
         The name of the run into which to import the raws.
     """
-    raws = glob.glob(os.path.join(raw_dir, '**', '*.fits.fz'), recursive=True)
-    # explicit config workaround for DM-971
+    raws = glob.glob(os.path.join(raw_dir, '**', '*.fits*'), recursive=True)
     ingester = lsst.obs.base.RawIngestTask(butler=repo, config=lsst.obs.base.RawIngestConfig())
     ingester.run(raws, run=run)
     exposures = set(repo.registry.queryDataIds(["exposure"]))
-    # explicit config workaround for DM-971
     definer = lsst.obs.base.DefineVisitsTask(butler=repo, config=lsst.obs.base.DefineVisitsConfig())
     definer.run(exposures)
 
 
+########################################
+# Dummy pipeline inputs
+
+def _make_visit_datasets(repo, run):
+    """Create stub datasets for running GetRegionTimeFromVisitTask.
+
+    Parameters
+    ---------
+    repo : `lsst.daf.butler.Butler`
+        A writeable Butler in which to create datasets.
+    run : `str`
+        The name of the run into which to create datasets.
+    """
+    dummy_type = DatasetType(VISIT_DATASET, {"instrument", "visit", "detector"}, "DataFrame")
+    repo.registry.registerDatasetType(dummy_type)
+    # Exclude unused detectors
+    data_ids = {ref.dataId for ref in repo.query_datasets("raw", collections="*", find_first=False)}
+    exp_table = pandas.DataFrame()
+    for id in data_ids:
+        repo.put(exp_table, dummy_type, id, run=run)
+
+
 ########################################
 # Download ephemerides
 
 def _get_ephem(repo_dir, raw_collection, ephem_collection):
-    """Run the task for downloading ephemerides.
+    """Run the tasks for downloading ephemerides.
 
     Parameters
     ----------
@@ -176,25 +204,12 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
     dest_repo : `lsst.daf.butler.Butler`
         The repository to which to copy the datasets.
     """
-    # Need to transfer visit definitions as well; Butler.export is the easiest
+    # Need to transfer group definitions as well; Butler.export is the easiest
     # way to do this.
     with tempfile.NamedTemporaryFile(suffix=".yaml") as export_file:
         with src_repo.export(filename=export_file.name, transfer=None) as contents:
             contents.saveDatasets(src_repo.registry.queryDatasets(ephem_type, collections=run),
-                                  elements=["visit"])
-            # Because of how the temp repo was constructed, there should not be
-            # any visit/exposure records other than those needed to support the
-            # ephemerides datasets.
-            contents.saveDimensionData("visit_system",
-                                       src_repo.registry.queryDimensionRecords("visit_system"))
-            contents.saveDimensionData("visit",
-                                       src_repo.registry.queryDimensionRecords("visit"))
-            contents.saveDimensionData("exposure",
-                                       src_repo.registry.queryDimensionRecords("exposure"))
-            contents.saveDimensionData("visit_definition",
-                                       src_repo.registry.queryDimensionRecords("visit_definition"))
-            contents.saveDimensionData("visit_detector_region",
-                                       src_repo.registry.queryDimensionRecords("visit_detector_region"))
+                                  elements=["group"])
             # runs included automatically by saveDatasets
         dest_repo.import_(directory=src_dir, filename=export_file.name, transfer="copy")
 
@@ -207,11 +222,15 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
     temp_repo = _make_repo_with_instruments(workspace, _get_instruments(DEST_DIR))
     logging.info("Ingesting raws...")
     _ingest_raws(temp_repo, RAW_DIR, RAW_RUN)
+    _make_visit_datasets(temp_repo, RAW_RUN)
     logging.info("Downloading ephemerides...")
     _get_ephem(workspace, RAW_RUN, DEST_RUN)
     temp_repo.registry.refresh()    # Pipeline added dataset types
     preloaded = Butler(DEST_DIR, writeable=True)
+    logging.debug("Preloaded repo has universe version %d.", preloaded.dimensions.version)
     logging.info("Transferring ephemerides to dataset...")
     _transfer_ephems(EPHEM_DATASET, temp_repo, workspace, DEST_RUN, preloaded)
+preloaded.registry.registerCollection(DEST_COLLECTION, CollectionType.CHAINED)
+preloaded.registry.setCollectionChain(DEST_COLLECTION, [DEST_RUN])
 
-logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_RUN)
+logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_COLLECTION)