Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-45750: Store new-style ephemerides and APDB catalogs in ap_verify data sets #61

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ path | description
`config` | Dataset-specific configs to help the Science Pipelines work with this dataset.
`pipelines` | Dataset-specific pipelines to run on this dataset.
`dataIds.list` | List of dataIds for use in running Tasks. Currently set to run all Ids.
`preloaded` | Starter Gen3 Butler repo containing a skymap, DECam Community Pipeline MasterCalibs from the 2015 HiTS campaign, deep g-band coadds for use as differencing templates, Gaia and PS1 reference catalogs in HTM format for regions overlapping any visit in the dataset, and a pretrained machine learning model for real/bogus classification.
`preloaded` | Starter Gen3 Butler repo containing a skymap, DECam Community Pipeline MasterCalibs from the 2015 HiTS campaign, deep g-band coadds for use as differencing templates, Gaia and PS1 reference catalogs in HTM format for regions overlapping any visit in the dataset, mock APDB outputs based on the raw images, and a pretrained machine learning model for real/bogus classification.
`scripts` | Scripts and data for generating this dataset.

Git LFS
Expand Down
1,497 changes: 316 additions & 1,181 deletions config/export.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/ap_verify_ci_hits2015/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ It contains:
* biases (``zci``) and g-band flats (``fci``)
* reference catalogs for Gaia and Pan-STARRS1, covering the raw images' footprint.
* image differencing templates coadded from HiTS 2014 data, covering the raw images' footprint.
* mock APDB catalogs based on processing the raw images in order.
* the rbResnet50-DC2 pretrained machine learning model for real/bogus classification

.. _ap_verify_ci_hits2015-contributing:
Expand Down
6 changes: 5 additions & 1 deletion pipelines/Ephemerides.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
description: Pipeline for downloading solar system ephemerides given raw images
instrument: lsst.obs.decam.DarkEnergyCamera
tasks:
SkyBotEphemerisQuery: lsst.ap.association.skyBotEphemerisQuery.SkyBotEphemerisQueryTask
getRegionTimeFromVisit:
class: lsst.pipe.tasks.getRegionTimeFromVisit.GetRegionTimeFromVisitTask
config:
connections.dummy_visit = visit_dummy
mpSkyEphemerisQuery: lsst.ap.association.MPSkyEphemerisQueryTask
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion preloaded/gen3.sqlite3
Git LFS file not shown
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
9 changes: 7 additions & 2 deletions scripts/generate_all_gen3.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,17 @@ python "${SCRIPT_DIR}/generate_ephemerides_gen3.py"

"${SCRIPT_DIR}/generate_fake_injection_catalog.sh" -b ${DATASET_REPO} -o ${INJECTION_CATALOG_COLLECTION}

########################################
# Generate self-consistent APDB data

python "${SCRIPT_DIR}/generate_self_preload.py" # Must be run after all other ApPipe inputs available

########################################
# Final clean-up

butler collection-chain "${DATASET_REPO}" sso sso/cached
butler collection-chain "${DATASET_REPO}" DECam/defaults templates/goodSeeing skymaps DECam/calib refcats sso \
models ${INJECTION_CATALOG_COLLECTION}
butler collection-chain "${DATASET_REPO}" DECam/defaults templates/goodSeeing skymaps DECam/calib \
refcats sso dia_catalogs models ${INJECTION_CATALOG_COLLECTION}
python "${SCRIPT_DIR}/make_preloaded_export.py"

echo "Gen 3 preloaded repository complete."
Expand Down
73 changes: 46 additions & 27 deletions scripts/generate_ephemerides_gen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@

Running this script allows for updates to the ephemerides to be incorporated
into the dataset.

This script takes no command-line arguments; it infers everything it needs from
the `preloaded/` repository.
"""

import glob
Expand All @@ -34,9 +37,11 @@
import sys
import tempfile

import pandas

import lsst.log
import lsst.sphgeom
from lsst.daf.butler import Butler, FileDataset
from lsst.daf.butler import Butler, CollectionType, DatasetType
import lsst.obs.base


Expand All @@ -46,12 +51,14 @@

# Avoid explicit references to dataset package to maximize portability.
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
PIPE_DIR = os.path.join(SCRIPT_DIR, "..", "pipelines")
RAW_DIR = os.path.join(SCRIPT_DIR, "..", "raw")
PIPE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "pipelines"))
RAW_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "raw"))
RAW_RUN = "raw"
EPHEM_DATASET = "visitSsObjects"
DEST_DIR = os.path.join(SCRIPT_DIR, "..", "preloaded")
DEST_RUN = "sso/cached"
VISIT_DATASET = "visit_dummy"
EPHEM_DATASET = "preloaded_SsObjects"
DEST_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "preloaded"))
DEST_COLLECTION = "sso"
DEST_RUN = DEST_COLLECTION + "/mpsky"


########################################
Expand Down Expand Up @@ -92,13 +99,14 @@ def _make_repo_with_instruments(repo_dir, instruments):
"""
config = Butler.makeRepo(repo_dir)
repo = Butler(config, writeable=True)
logging.debug("Temporary repo has universe version %d.", repo.dimensions.version)
for instrument in instruments:
instrument.register(repo.registry)
return repo


########################################
# Ingest raws (needed for visitinfo)
# Ingest raws (needed for visit records)

def _ingest_raws(repo, raw_dir, run):
"""Ingest this dataset's raws into a specific repo.
Expand All @@ -112,21 +120,41 @@ def _ingest_raws(repo, raw_dir, run):
run : `str`
The name of the run into which to import the raws.
"""
raws = glob.glob(os.path.join(raw_dir, '**', '*.fits.fz'), recursive=True)
# explicit config workaround for DM-971
raws = glob.glob(os.path.join(raw_dir, '**', '*.fits*'), recursive=True)
ingester = lsst.obs.base.RawIngestTask(butler=repo, config=lsst.obs.base.RawIngestConfig())
ingester.run(raws, run=run)
exposures = set(repo.registry.queryDataIds(["exposure"]))
# explicit config workaround for DM-971
definer = lsst.obs.base.DefineVisitsTask(butler=repo, config=lsst.obs.base.DefineVisitsConfig())
definer.run(exposures)


########################################
# Dummy pipeline inputs

def _make_visit_datasets(repo, run):
"""Create stub datasets for running GetRegionTimeFromVisitTask.

Parameters
---------
repo : `lsst.daf.butler.Butler`
A writeable Butler in which to create datasets.
run : `str`
The name of the run into which to create datasets.
"""
dummy_type = DatasetType(VISIT_DATASET, {"instrument", "visit", "detector"}, "DataFrame")
repo.registry.registerDatasetType(dummy_type)
# Exclude unused detectors
data_ids = {ref.dataId for ref in repo.query_datasets("raw", collections="*", find_first=False)}
exp_table = pandas.DataFrame()
for id in data_ids:
repo.put(exp_table, dummy_type, id, run=run)


########################################
# Download ephemerides

def _get_ephem(repo_dir, raw_collection, ephem_collection):
"""Run the task for downloading ephemerides.
"""Run the tasks for downloading ephemerides.

Parameters
----------
Expand Down Expand Up @@ -176,25 +204,12 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
dest_repo : `lsst.daf.butler.Butler`
The repository to which to copy the datasets.
"""
# Need to transfer visit definitions as well; Butler.export is the easiest
# Need to transfer group definitions as well; Butler.export is the easiest
# way to do this.
with tempfile.NamedTemporaryFile(suffix=".yaml") as export_file:
with src_repo.export(filename=export_file.name, transfer=None) as contents:
contents.saveDatasets(src_repo.registry.queryDatasets(ephem_type, collections=run),
elements=["visit"])
# Because of how the temp repo was constructed, there should not be
# any visit/exposure records other than those needed to support the
# ephemerides datasets.
contents.saveDimensionData("visit_system",
src_repo.registry.queryDimensionRecords("visit_system"))
contents.saveDimensionData("visit",
src_repo.registry.queryDimensionRecords("visit"))
contents.saveDimensionData("exposure",
src_repo.registry.queryDimensionRecords("exposure"))
contents.saveDimensionData("visit_definition",
src_repo.registry.queryDimensionRecords("visit_definition"))
contents.saveDimensionData("visit_detector_region",
src_repo.registry.queryDimensionRecords("visit_detector_region"))
elements=["group"])
# runs included automatically by saveDatasets
dest_repo.import_(directory=src_dir, filename=export_file.name, transfer="copy")

Expand All @@ -207,11 +222,15 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
temp_repo = _make_repo_with_instruments(workspace, _get_instruments(DEST_DIR))
logging.info("Ingesting raws...")
_ingest_raws(temp_repo, RAW_DIR, RAW_RUN)
_make_visit_datasets(temp_repo, RAW_RUN)
logging.info("Downloading ephemerides...")
_get_ephem(workspace, RAW_RUN, DEST_RUN)
temp_repo.registry.refresh() # Pipeline added dataset types
preloaded = Butler(DEST_DIR, writeable=True)
logging.debug("Preloaded repo has universe version %d.", preloaded.dimensions.version)
logging.info("Transferring ephemerides to dataset...")
_transfer_ephems(EPHEM_DATASET, temp_repo, workspace, DEST_RUN, preloaded)
preloaded.registry.registerCollection(DEST_COLLECTION, CollectionType.CHAINED)
preloaded.registry.setCollectionChain(DEST_COLLECTION, [DEST_RUN])

logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_RUN)
logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_COLLECTION)
Loading
Loading