lsst · dhirving · Oct 15, 2024 · Oct 14, 2024 · timj · Oct 15, 2024
diff --git a/python/lsst/ap/verify/ingestion.py b/python/lsst/ap/verify/ingestion.py
@@ -31,7 +31,6 @@
 
 import fnmatch
 import os
-import re
 import shutil
 from glob import glob
 import logging
@@ -138,9 +137,12 @@ def _ensureRaws(self, processes):
         RuntimeError
             Raised if there are no files to ingest.
         """
-        # TODO: regex is workaround for DM-25945
-        rawCollectionFilter = re.compile(self.dataset.instrument.makeDefaultRawIngestRunName())
-        rawCollections = list(self.workspace.workButler.registry.queryCollections(rawCollectionFilter))
+        try:
+            collectionName = self.dataset.instrument.makeDefaultRawIngestRunName()
+            rawCollections = list(self.workspace.workButler.registry.queryCollections(collectionName))
+        except lsst.daf.butler.MissingCollectionError:
+            rawCollections = []
+
         rawData = list(self.workspace.workButler.registry.queryDatasets(
             'raw',
             collections=rawCollections,

diff --git a/python/lsst/ap/verify/pipeline_driver.py b/python/lsst/ap/verify/pipeline_driver.py
@@ -277,7 +277,10 @@ def _getCollectionArguments(workspace, reuse):
     # skip-existing-in would work around that, but would lead to a worse bug in
     # the case that the user is alternating runs with and without --clean-run.
     # registry.refresh()
-    oldRuns = list(registry.queryCollections(re.compile(workspace.outputName + r"/\d+T\d+Z")))
+    collectionPattern = re.compile(workspace.outputName + r"/\d+T\d+Z")
+    oldRuns = list(registry.queryCollections(workspace.outputName + "/*"))
+    oldRuns = [run for run in oldRuns if collectionPattern.fullmatch(run)]
+
     if reuse and oldRuns:
         args.extend(["--extend-run", "--skip-existing"])
     return args

diff --git a/python/lsst/ap/verify/workspace.py b/python/lsst/ap/verify/workspace.py
@@ -26,7 +26,6 @@
 import abc
 import os
 import pathlib
-import re
 import stat
 
 import lsst.daf.butler as dafButler
@@ -219,10 +218,31 @@ def _ensureCollection(self, registry, name, collectionType):
             The type of collection to add. This field is ignored when
             testing if a collection exists.
         """
-        matchingCollections = list(registry.queryCollections(re.compile(name)))
-        if not matchingCollections:
+        if not self._doesCollectionExist(registry, name):
             registry.registerCollection(name, type=collectionType)
 
+    def _doesCollectionExist(self, registry, name):
+        """Check if a collection exists in the registry.
+
+        Parameters
+        ----------
+        registry : `lsst.daf.butler.Registry`
+            The repository that may contain the collection.
+        name : `str`
+            The name of the collection to check for existence.
+
+        Returns
+        -------
+        exists : `bool`
+            `True` if the collection exists in the registry, `False` otherwise.
+
+        """
+        try:
+            matchingCollections = list(registry.queryCollections(name))
+            return len(matchingCollections) > 0
+        except dafButler.MissingCollectionError:
+            return False
+
     @property
     def workButler(self):
         """A Butler that can read and write to a Gen 3 repository (`lsst.daf.butler.Butler`, read-only).
@@ -247,8 +267,7 @@ def workButler(self):
 
                 # Create an output chain here, so that workButler can see it.
                 # Definition does not conflict with what pipetask --output uses.
-                # Regex is workaround for DM-25945.
-                if not list(queryButler.registry.queryCollections(re.compile(self.outputName))):
+                if not self._doesCollectionExist(queryButler.registry, self.outputName):
                     queryButler.registry.registerCollection(self.outputName,
                                                             dafButler.CollectionType.CHAINED)
                     queryButler.registry.setCollectionChain(self.outputName, inputs)

diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py
@@ -23,7 +23,6 @@
 
 import os
 import pickle
-import re
 import shutil
 import tempfile
 import unittest
@@ -133,9 +132,7 @@ def testCalibIngestDriver(self):
         # queryDatasets cannot (yet) search CALIBRATION collections, so we
         # instead search the RUN-type collections that calibrations are
         # ingested into first before being associated with a validity range.
-        calibrationRunPattern = re.compile(
-            re.escape(self.dataset.instrument.makeCollectionName("calib") + "/") + ".+"
-        )
+        calibrationRunPattern = self.dataset.instrument.makeCollectionName("calib") + "/*"
         calibrationRuns = list(
             self.butler.registry.queryCollections(
                 calibrationRunPattern,