Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-45750: Store new-style ephemerides and APDB catalogs in ap_verify data sets #49

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ path | description
`config` | Dataset-specific configs to help Stack code work with this dataset.
`pipelines` | Dataset-specific pipelines to run on this dataset.
`dataIds.list` | List of dataIds in this repo. For use in running Tasks. Currently set to run all Ids.
`preloaded` | A Gen 3 Butler repository containing HSC master calibs from the 2016 COSMOS campaign (or, where necessary, from 2015), coadded images for use as differencing templates, PS1 reference catalog in HTM format for regions overlapping any visit in the dataset, and a pretrained machine learning model for real/bogus classification.
`preloaded` | A Gen 3 Butler repository containing HSC master calibs from the 2016 COSMOS campaign (or, where necessary, from 2015), coadded images for use as differencing templates, PS1 reference catalog in HTM format for regions overlapping any visit in the dataset, mock APDB outputs based on the raw images, and a pretrained machine learning model for real/bogus classification.
`scripts` | Scripts and data for generating this dataset.


Expand Down
9,511 changes: 3,709 additions & 5,802 deletions config/export.yaml

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions doc/ap_verify_ci_cosmos_pdr2/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ It contains:
* biases, darks, brighter-fatter kernels, and g-band flats.
* reference catalogs for Pan-STARRS1, covering the raw images' footprint.
* image differencing templates coadded from 2014 COSMOS data, covering the raw images' footprint.
* mock APDB catalogs based on processing the raw images in order
* the rbResnet50-DC2 pretrained machine learning model for real/bogus classification

.. _ap_verify_ci_cosmos_pdr2-contributing:
Expand Down
6 changes: 5 additions & 1 deletion pipelines/Ephemerides.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
description: Pipeline for downloading solar system ephemerides given raw images
instrument: lsst.obs.subaru.HyperSuprimeCam
tasks:
SkyBotEphemerisQuery: lsst.ap.association.skyBotEphemerisQuery.SkyBotEphemerisQueryTask
getRegionTimeFromVisit:
class: lsst.pipe.tasks.getRegionTimeFromVisit.GetRegionTimeFromVisitTask
config:
connections.dummy_visit = visit_dummy
mpSkyEphemerisQuery: lsst.ap.association.MPSkyEphemerisQueryTask
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion preloaded/gen3.sqlite3
Git LFS file not shown
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
67 changes: 43 additions & 24 deletions scripts/generate_ephemerides_gen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@

Running this script allows for updates to the ephemerides to be incorporated
into the dataset.

This script takes no command-line arguments; it infers everything it needs from
the `preloaded/` repository.
"""

import glob
Expand All @@ -34,9 +37,11 @@
import sys
import tempfile

import pandas

import lsst.log
import lsst.sphgeom
from lsst.daf.butler import Butler, FileDataset
from lsst.daf.butler import Butler, CollectionType, DatasetType
import lsst.obs.base


Expand All @@ -46,12 +51,14 @@

# Avoid explicit references to dataset package to maximize portability.
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
PIPE_DIR = os.path.join(SCRIPT_DIR, "..", "pipelines")
RAW_DIR = os.path.join(SCRIPT_DIR, "..", "raw")
PIPE_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "pipelines"))
RAW_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "raw"))
RAW_RUN = "raw"
EPHEM_DATASET = "visitSsObjects"
DEST_DIR = os.path.join(SCRIPT_DIR, "..", "preloaded")
DEST_RUN = "sso/cached"
VISIT_DATASET = "visit_dummy"
EPHEM_DATASET = "preloaded_SsObjects"
DEST_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "..", "preloaded"))
DEST_COLLECTION = "sso"
DEST_RUN = DEST_COLLECTION + "/mpsky"


########################################
Expand Down Expand Up @@ -99,7 +106,7 @@ def _make_repo_with_instruments(repo_dir, instruments):


########################################
# Ingest raws (needed for visitinfo)
# Ingest raws (needed for visit records)

def _ingest_raws(repo, raw_dir, run):
"""Ingest this dataset's raws into a specific repo.
Expand All @@ -121,11 +128,33 @@ def _ingest_raws(repo, raw_dir, run):
definer.run(exposures)


########################################
# Dummy pipeline inputs

def _make_visit_datasets(repo, run):
"""Create stub datasets for running GetRegionTimeFromVisitTask.

Parameters
---------
repo : `lsst.daf.butler.Butler`
A writeable Butler in which to create datasets.
run : `str`
The name of the run into which to create datasets.
"""
dummy_type = DatasetType(VISIT_DATASET, {"instrument", "visit", "detector"}, "DataFrame")
repo.registry.registerDatasetType(dummy_type)
# Exclude unused detectors
data_ids = {ref.dataId for ref in repo.query_datasets("raw", collections="*", find_first=False)}
exp_table = pandas.DataFrame()
for id in data_ids:
repo.put(exp_table, dummy_type, id, run=run)


########################################
# Download ephemerides

def _get_ephem(repo_dir, raw_collection, ephem_collection):
"""Run the task for downloading ephemerides.
"""Run the tasks for downloading ephemerides.

Parameters
----------
Expand Down Expand Up @@ -175,25 +204,12 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
dest_repo : `lsst.daf.butler.Butler`
The repository to which to copy the datasets.
"""
# Need to transfer visit definitions as well; Butler.export is the easiest
# Need to transfer group definitions as well; Butler.export is the easiest
# way to do this.
with tempfile.NamedTemporaryFile(suffix=".yaml") as export_file:
with src_repo.export(filename=export_file.name, transfer=None) as contents:
contents.saveDatasets(src_repo.registry.queryDatasets(ephem_type, collections=run),
elements=["visit"])
# Because of how the temp repo was constructed, there should not be
# any visit/exposure records other than those needed to support the
# ephemerides datasets.
contents.saveDimensionData("visit_system",
src_repo.registry.queryDimensionRecords("visit_system"))
contents.saveDimensionData("visit",
src_repo.registry.queryDimensionRecords("visit"))
contents.saveDimensionData("exposure",
src_repo.registry.queryDimensionRecords("exposure"))
contents.saveDimensionData("visit_definition",
src_repo.registry.queryDimensionRecords("visit_definition"))
contents.saveDimensionData("visit_detector_region",
src_repo.registry.queryDimensionRecords("visit_detector_region"))
elements=["group"])
# runs included automatically by saveDatasets
dest_repo.import_(directory=src_dir, filename=export_file.name, transfer="copy")

Expand All @@ -206,12 +222,15 @@ def _transfer_ephems(ephem_type, src_repo, src_dir, run, dest_repo):
temp_repo = _make_repo_with_instruments(workspace, _get_instruments(DEST_DIR))
logging.info("Ingesting raws...")
_ingest_raws(temp_repo, RAW_DIR, RAW_RUN)
_make_visit_datasets(temp_repo, RAW_RUN)
logging.info("Downloading ephemerides...")
_get_ephem(workspace, RAW_RUN, DEST_RUN)
temp_repo.registry.refresh() # Pipeline added dataset types
preloaded = Butler(DEST_DIR, writeable=True)
logging.debug("Preloaded repo has universe version %d.", preloaded.dimensions.version)
logging.info("Transferring ephemerides to dataset...")
_transfer_ephems(EPHEM_DATASET, temp_repo, workspace, DEST_RUN, preloaded)
preloaded.registry.registerCollection(DEST_COLLECTION, CollectionType.CHAINED)
preloaded.registry.setCollectionChain(DEST_COLLECTION, [DEST_RUN])

logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_RUN)
logging.info("Solar system catalogs copied to %s:%s", DEST_DIR, DEST_COLLECTION)
Loading
Loading