From 78d5f92ef6e677356fc0a6eefd6bfea30487c20b Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 25 Apr 2024 15:51:08 -0700 Subject: [PATCH 1/5] Update out-of-date Jira link. --- doc/lsst.ap.verify/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/lsst.ap.verify/index.rst b/doc/lsst.ap.verify/index.rst index 9c02c7df..d0a25397 100644 --- a/doc/lsst.ap.verify/index.rst +++ b/doc/lsst.ap.verify/index.rst @@ -32,7 +32,7 @@ Contributing ============ ``lsst.ap.verify`` is developed at https://github.com/lsst/ap_verify. -You can find Jira issues for this module under the `ap_verify `_ component. +You can find Jira issues for this module under the `ap_verify `_ component. .. _lsst.ap.verify-pyapi: From 19e2ab6af5b73aa2904862ae1be60e54081c24f8 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 25 Apr 2024 10:30:36 -0700 Subject: [PATCH 2/5] Add contracts enforcing consistent APDB use. ApdbMetricTasks should have their APDB configured in a way that's consistent with diaPipe, or they will break (possibly only at runtime). --- pipelines/DECam/ApVerify.yaml | 5 +++++ pipelines/DECam/ApVerifyCalibrate.yaml | 3 +++ pipelines/HSC/ApVerify.yaml | 5 +++++ pipelines/HSC/ApVerifyCalibrate.yaml | 5 +++++ pipelines/LSSTCam-imSim/ApVerify.yaml | 5 +++++ pipelines/LSSTCam-imSim/ApVerifyCalibrate.yaml | 3 +++ pipelines/_ingredients/ApVerify.yaml | 4 ++++ pipelines/_ingredients/ApVerifyCalibrate.yaml | 3 +++ 8 files changed, 33 insertions(+) diff --git a/pipelines/DECam/ApVerify.yaml b/pipelines/DECam/ApVerify.yaml index 98466769..fd83053f 100644 --- a/pipelines/DECam/ApVerify.yaml +++ b/pipelines/DECam/ApVerify.yaml @@ -16,3 +16,8 @@ tasks: config: doPackageAlerts: True alertPackager.doWriteAlerts: True +contracts: + # Contracts removed by excluding apPipe + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) diff --git a/pipelines/DECam/ApVerifyCalibrate.yaml b/pipelines/DECam/ApVerifyCalibrate.yaml index fa328e86..566debc6 100644 --- a/pipelines/DECam/ApVerifyCalibrate.yaml +++ b/pipelines/DECam/ApVerifyCalibrate.yaml @@ -18,6 +18,9 @@ tasks: contracts: # Must re-declare contracts that cross apPipe and metrics boundary, as # these were removed on import. + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) # Use of ConnectionsClass for templated fields is a workaround for DM-30210 - detectAndMeasure.connections.ConnectionsClass(config=detectAndMeasure).diaSources.name == fracDiaSourcesToSciSources.connections.ConnectionsClass(config=fracDiaSourcesToSciSources).diaSources.name diff --git a/pipelines/HSC/ApVerify.yaml b/pipelines/HSC/ApVerify.yaml index 9ff501bb..e41763d9 100644 --- a/pipelines/HSC/ApVerify.yaml +++ b/pipelines/HSC/ApVerify.yaml @@ -16,3 +16,8 @@ tasks: config: doPackageAlerts: True alertPackager.doWriteAlerts: True +contracts: + # Contracts removed by excluding apPipe + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) diff --git a/pipelines/HSC/ApVerifyCalibrate.yaml b/pipelines/HSC/ApVerifyCalibrate.yaml index 25f9ca15..f7a25936 100644 --- a/pipelines/HSC/ApVerifyCalibrate.yaml +++ b/pipelines/HSC/ApVerifyCalibrate.yaml @@ -15,3 +15,8 @@ tasks: class: lsst.ap.association.DiaPipelineTask config: doPackageAlerts: True +contracts: + # Contracts removed by excluding apPipe + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) diff --git a/pipelines/LSSTCam-imSim/ApVerify.yaml b/pipelines/LSSTCam-imSim/ApVerify.yaml index 6ab67390..c683e2ba 100644 --- a/pipelines/LSSTCam-imSim/ApVerify.yaml +++ b/pipelines/LSSTCam-imSim/ApVerify.yaml @@ -16,3 +16,8 @@ tasks: config: doPackageAlerts: True alertPackager.doWriteAlerts: True +contracts: + # Contracts removed by excluding apPipe + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) diff --git a/pipelines/LSSTCam-imSim/ApVerifyCalibrate.yaml b/pipelines/LSSTCam-imSim/ApVerifyCalibrate.yaml index 43578aa5..72c8788c 100644 --- a/pipelines/LSSTCam-imSim/ApVerifyCalibrate.yaml +++ b/pipelines/LSSTCam-imSim/ApVerifyCalibrate.yaml @@ -18,6 +18,9 @@ tasks: contracts: # Must re-declare contracts that cross apPipe and metrics boundary, as # these were removed on import. + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) # Use of ConnectionsClass for templated fields is a workaround for DM-30210 - detectAndMeasure.connections.ConnectionsClass(config=detectAndMeasure).diaSources.name == fracDiaSourcesToSciSources.connections.ConnectionsClass(config=fracDiaSourcesToSciSources).diaSources.name diff --git a/pipelines/_ingredients/ApVerify.yaml b/pipelines/_ingredients/ApVerify.yaml index 5230a794..4e908403 100644 --- a/pipelines/_ingredients/ApVerify.yaml +++ b/pipelines/_ingredients/ApVerify.yaml @@ -19,3 +19,7 @@ tasks: # we have an alternative. doPackageAlerts: True alertPackager.doWriteAlerts: True +contracts: + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) diff --git a/pipelines/_ingredients/ApVerifyCalibrate.yaml b/pipelines/_ingredients/ApVerifyCalibrate.yaml index 9cd9c5be..06626d3b 100644 --- a/pipelines/_ingredients/ApVerifyCalibrate.yaml +++ b/pipelines/_ingredients/ApVerifyCalibrate.yaml @@ -19,6 +19,9 @@ tasks: # we have an alternative. doPackageAlerts: True contracts: + - contract: diaPipe.doConfigureApdb or not totalUnassociatedDiaObjects.doReadMarker + msg: "totalUnassociatedDiaObjects.doReadMarker requires diaPipe.doConfigureApdb" + - (totalUnassociatedDiaObjects.doReadMarker) or (diaPipe.apdb_config_url == totalUnassociatedDiaObjects.apdb_config_url) # Metric inputs must match pipeline outputs # Use of ConnectionsClass for templated fields is a workaround for DM-30210 - detectAndMeasure.connections.ConnectionsClass(config=detectAndMeasure).diaSources.name == From 3b984d4af69e32d43e57aa283ba16ea527ba304d Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 25 Apr 2024 13:09:05 -0700 Subject: [PATCH 3/5] Use external APDB configs in pipelines/tasks. None of the tests actually try to load an APDB, so there's no need to define apdb_config_url for those components. --- pipelines/_ingredients/MetricsMisc.yaml | 3 +++ pipelines/_ingredients/MetricsMiscCalibrate.yaml | 3 +++ tests/MockApPipe.yaml | 3 +++ tests/test_testPipeline.py | 3 ++- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pipelines/_ingredients/MetricsMisc.yaml b/pipelines/_ingredients/MetricsMisc.yaml index 49e052f7..d5862410 100644 --- a/pipelines/_ingredients/MetricsMisc.yaml +++ b/pipelines/_ingredients/MetricsMisc.yaml @@ -25,3 +25,6 @@ tasks: connections.labelName: diaPipe totalUnassociatedDiaObjects: class: lsst.ap.association.metrics.TotalUnassociatedDiaObjectsMetricTask + config: + doReadMarker: False # Impossible if diaPipe uses new-style config + apdb_config_url: parameters.apdb_config diff --git a/pipelines/_ingredients/MetricsMiscCalibrate.yaml b/pipelines/_ingredients/MetricsMiscCalibrate.yaml index d88e6ffa..807c6b2b 100644 --- a/pipelines/_ingredients/MetricsMiscCalibrate.yaml +++ b/pipelines/_ingredients/MetricsMiscCalibrate.yaml @@ -26,6 +26,9 @@ tasks: connections.labelName: diaPipe totalUnassociatedDiaObjects: class: lsst.ap.association.metrics.TotalUnassociatedDiaObjectsMetricTask + config: + doReadMarker: False # Impossible if diaPipe uses new-style config + apdb_config_url: parameters.apdb_config numSciSources: class: lsst.ip.diffim.metrics.NumberSciSourcesMetricTask fracDiaSourcesToSciSources: diff --git a/tests/MockApPipe.yaml b/tests/MockApPipe.yaml index d6e7bd07..fe3d2649 100644 --- a/tests/MockApPipe.yaml +++ b/tests/MockApPipe.yaml @@ -5,6 +5,7 @@ parameters: coaddName: goodSeeing # only refcat in ap_verify_testdata refcat: gaia + apdb_config: dummy_path.yaml tasks: isr: class: lsst.ap.verify.testPipeline.MockIsrTask @@ -54,6 +55,8 @@ tasks: class: lsst.ap.verify.testPipeline.MockDiaPipelineTask config: doWriteAssociatedSources: True + doConfigureApdb: False + apdb_config_url: parameters.apdb_config connections.coaddName: parameters.coaddName contracts: # Inputs and outputs must match diff --git a/tests/test_testPipeline.py b/tests/test_testPipeline.py index 290b94cc..ccdbbe81 100644 --- a/tests/test_testPipeline.py +++ b/tests/test_testPipeline.py @@ -337,7 +337,8 @@ def testMockTransformDiaSourceCatalogTask(self): def testMockDiaPipelineTask(self): config = MockDiaPipelineTask.ConfigClass() - config.apdb.db_url = "testing_only" + config.doConfigureApdb = False + config.apdb_config_url = "testing_only" task = MockDiaPipelineTask(config=config) pipelineTests.assertValidInitOutput(task) result = task.run(pandas.DataFrame(), pandas.DataFrame(), afwImage.ExposureF(), From ffe20f2b2019edbda10420c81ba31b61e0fa7fa5 Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 25 Apr 2024 14:49:06 -0700 Subject: [PATCH 4/5] Add "database config" field to Workspace. This field specifies where to store the config created by ap_verify. --- python/lsst/ap/verify/workspace.py | 13 +++++++++++++ tests/test_workspace.py | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/python/lsst/ap/verify/workspace.py b/python/lsst/ap/verify/workspace.py index b22a33df..4797007f 100644 --- a/python/lsst/ap/verify/workspace.py +++ b/python/lsst/ap/verify/workspace.py @@ -114,6 +114,15 @@ def dbLocation(self): Shall be a pathname to a database suitable for the backend of `Apdb`. """ + @property + @abc.abstractmethod + def dbConfigLocation(self): + """The absolute location of the config file for the source association + database to be created or updated by the pipeline (`str`, read-only). + + The location is assumed to be a Python (`lsst.pex.config.Config`) file. + """ + @property @abc.abstractmethod def alertLocation(self): @@ -189,6 +198,10 @@ def pipelineDir(self): def dbLocation(self): return os.path.join(self._location, 'association.db') + @property + def dbConfigLocation(self): + return os.path.join(self._location, 'apdb.py') + @property def alertLocation(self): return os.path.join(self._location, 'alerts') diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 12149f90..cea1312c 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -106,6 +106,15 @@ def testDatabase(self): # Workspace spec allows these to be URIs or paths, whatever the Butler accepts self._assertNotInDir(self._testbed.dbLocation, url2pathname(self._testbed.repo)) + def testDbConfig(self): + """Verify that a WorkspaceGen3 requests a database config in the target + directory, but not in any repository. + """ + root = self._testWorkspace + self._assertInDir(self._testbed.dbConfigLocation, root) + # Workspace spec allows these to be URIs or paths, whatever the Butler accepts + self._assertNotInDir(self._testbed.dbConfigLocation, url2pathname(self._testbed.repo)) + def testAlerts(self): """Verify that a WorkspaceGen3 requests an alert dump in the target directory, but not in any repository. From 124dfae146a0a69a58a2aa2d2a3c48370aae7dea Mon Sep 17 00:00:00 2001 From: Krzysztof Findeisen Date: Thu, 25 Apr 2024 15:19:46 -0700 Subject: [PATCH 5/5] Use new-style configs inside ap_verify. This change removes ap_verify's dependency on the deprecated makeApdb function. Although the new configs allow better support for Cassandra APDBs, ap_verify itself only supports SQL (particularly SQLite), given its restricted scope. --- python/lsst/ap/verify/pipeline_driver.py | 43 +++++++++++++++--------- tests/test_driver.py | 36 +++++++------------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/python/lsst/ap/verify/pipeline_driver.py b/python/lsst/ap/verify/pipeline_driver.py index 2925a5de..37c6c0d5 100644 --- a/python/lsst/ap/verify/pipeline_driver.py +++ b/python/lsst/ap/verify/pipeline_driver.py @@ -36,8 +36,7 @@ import logging import lsst.ctrl.mpexec.execFixupDataId # not part of lsst.ctrl.mpexec -import lsst.ctrl.mpexec.cli.pipetask -from lsst.ap.pipe.make_apdb import makeApdb +import lsst.dax.apdb as daxApdb _LOG = logging.getLogger(__name__) @@ -60,7 +59,7 @@ def __init__(self): help="A custom version of the ap_verify pipeline (e.g., with different metrics). " "Defaults to the ApVerify.yaml within --dataset.") self.add_argument("--db", "--db_url", default=None, - help="A location for the AP database, formatted as if for ApdbConfig.db_url. " + help="A location for the AP database, formatted as if for apdb-cli create-sql. " "Defaults to an SQLite file in the --output directory.") self.add_argument("--skip-pipeline", action="store_true", help="Do not run the AP pipeline itself. This argument is useful " @@ -91,7 +90,7 @@ def runApPipeGen3(workspace, parsedCmdLine, processes=1): """ log = _LOG.getChild('runApPipeGen3') - makeApdb(_getApdbArguments(workspace, parsedCmdLine)) + _makeApdb(workspace, _getApdbArguments(workspace, parsedCmdLine)) pipelineFile = _getPipelineFile(workspace, parsedCmdLine) pipelineArgs = ["pipetask", "--long-log", "run", @@ -197,8 +196,8 @@ def _getPipelineFile(workspace, parsed): def _getApdbArguments(workspace, parsed): - """Return the config options for running make_apdb.py on this workspace, - as command-line arguments. + """Return the arguments for running apdb-cli create-sql on this workspace, + as key-value pairs. Parameters ---------- @@ -210,14 +209,14 @@ def _getApdbArguments(workspace, parsed): Returns ------- - args : `list` of `str` - Command-line arguments calling ``--config`` or ``--config-file``, - following the conventions of `sys.argv`. + args : mapping [`str`] + Arguments to `lsst.dax.apdb.sql.Apdb.init_database`. """ if not parsed.db: parsed.db = "sqlite:///" + workspace.dbLocation - args = ["--config", "db_url=" + parsed.db] + args = {"db_url": parsed.db, + } return args @@ -239,14 +238,12 @@ def _getConfigArgumentsGen3(workspace, parsed): Command-line arguments calling ``--config`` or ``--config-file``, following the conventions of `sys.argv`. """ - # Translate APDB-only arguments to work as a sub-config - args = [("diaPipe:apdb." + arg if arg != "--config" else arg) - for arg in _getApdbArguments(workspace, parsed)] - args.extend([ + return [ + # APDB config should have been stored in the workspace. + "--config", "parameters:apdb_config=" + workspace.dbConfigLocation, # Put output alerts into the workspace. "--config", "diaPipe:alertPackager.alertWriteLocation=" + workspace.alertLocation, - ]) - return args + ] def _getCollectionArguments(workspace, reuse): @@ -284,3 +281,17 @@ def _getCollectionArguments(workspace, reuse): if reuse and oldRuns: args.extend(["--extend-run", "--skip-existing"]) return args + + +def _makeApdb(workspace, args): + """Create an APDB and store its config for future use. + + Parameters + ---------- + workspace : `lsst.ap.verify.workspace.Workspace` + A Workspace in which to store the database config. + args : mapping [`str`] + Arguments to `lsst.dax.apdb.sql.Apdb.init_database`. + """ + config = daxApdb.ApdbSql.init_database(**args) + config.save(workspace.dbConfigLocation) diff --git a/tests/test_driver.py b/tests/test_driver.py index 04fc6a24..758b89a9 100644 --- a/tests/test_driver.py +++ b/tests/test_driver.py @@ -46,7 +46,7 @@ def patchApPipeGen3(method): """ @functools.wraps(method) def wrapper(self, *args, **kwargs): - dbPatcher = unittest.mock.patch("lsst.ap.verify.pipeline_driver.makeApdb") + dbPatcher = unittest.mock.patch("lsst.ap.verify.pipeline_driver._makeApdb") patchedMethod = dbPatcher(method) return patchedMethod(self, *args, **kwargs) return wrapper @@ -93,13 +93,13 @@ def testrunApPipeGen3Steps(self): self.assertTrue(self.workspace.analysisButler.exists("apdb_marker", id)) self.assertTrue(self.workspace.analysisButler.exists("goodSeeingDiff_assocDiaSrc", id)) - def _getCmdLineArgs(self, parseAndRunArgs): - if parseAndRunArgs[0]: - return parseAndRunArgs[0][0] - elif "args" in parseAndRunArgs[1]: - return parseAndRunArgs[1]["args"] + def _getArgs(self, call_args): + if call_args.args: + return call_args.args[1] + elif "args" in call_args.kwargs: + return call_args.kwargs["args"] else: - self.fail("No command-line args passed to parseAndRun!") + self.fail(f"No APDB args passed to {call_args}!") @patchApPipeGen3 def testrunApPipeGen3WorkspaceDb(self, mockDb): @@ -107,15 +107,10 @@ def testrunApPipeGen3WorkspaceDb(self, mockDb): """ pipeline_driver.runApPipeGen3(self.workspace, self.apPipeArgs) - # Test the call to make_apdb.py mockDb.assert_called_once() - cmdLineArgs = self._getCmdLineArgs(mockDb.call_args) - self.assertIn("db_url=sqlite:///" + self.workspace.dbLocation, cmdLineArgs) - - # Test the call to the AP pipeline - id = _getDataIds(self.workspace.analysisButler)[0] - apdbConfig = self.workspace.analysisButler.get("apdb_marker", id) - self.assertEqual(apdbConfig.db_url, "sqlite:///" + self.workspace.dbLocation) + dbArgs = self._getArgs(mockDb.call_args) + self.assertIn("db_url", dbArgs) + self.assertEqual(dbArgs["db_url"], "sqlite:///" + self.workspace.dbLocation) @patchApPipeGen3 def testrunApPipeGen3WorkspaceCustom(self, mockDb): @@ -124,15 +119,10 @@ def testrunApPipeGen3WorkspaceCustom(self, mockDb): self.apPipeArgs.db = "postgresql://somebody@pgdb.misc.org/custom_db" pipeline_driver.runApPipeGen3(self.workspace, self.apPipeArgs) - # Test the call to make_apdb.py mockDb.assert_called_once() - cmdLineArgs = self._getCmdLineArgs(mockDb.call_args) - self.assertIn("db_url=" + self.apPipeArgs.db, cmdLineArgs) - - # Test the call to the AP pipeline - id = _getDataIds(self.workspace.analysisButler)[0] - apdbConfig = self.workspace.analysisButler.get("apdb_marker", id) - self.assertEqual(apdbConfig.db_url, self.apPipeArgs.db) + dbArgs = self._getArgs(mockDb.call_args) + self.assertIn("db_url", dbArgs) + self.assertEqual(dbArgs["db_url"], self.apPipeArgs.db) def testrunApPipeGen3Reuse(self): """Test that runApPipeGen3 does not run the pipeline at all (not even with