From 951b86160bf3e43289a5db2781cae87f735ca187 Mon Sep 17 00:00:00 2001 From: Andy Salnikov Date: Wed, 4 Oct 2023 11:46:31 -0700 Subject: [PATCH] Make datastore records private attribute of DatasetRef. Jim suggested that we use new `OpaqueRecordSet` class to keep datastore records, but after discussion we figured it would requre lots of additional structure to support it. For now there is no clear need for that class and we can continue using `StoredDatastoreItemInfo` in DatasetRefs, but want to make records private to give us freedom to change it later. --- python/lsst/daf/butler/_butler.py | 4 ++-- python/lsst/daf/butler/_dataset_ref.py | 18 +++++++++--------- .../daf/butler/datastores/fileDatastore.py | 8 ++++---- python/lsst/daf/butler/registries/sql.py | 4 ++-- .../daf/butler/registry/interfaces/_bridge.py | 2 +- tests/test_datasets.py | 10 ++++++++-- 6 files changed, 26 insertions(+), 20 deletions(-) diff --git a/python/lsst/daf/butler/_butler.py b/python/lsst/daf/butler/_butler.py index ca76a73f42..31a35fcf60 100644 --- a/python/lsst/daf/butler/_butler.py +++ b/python/lsst/daf/butler/_butler.py @@ -1061,7 +1061,7 @@ def _findDatasetRef( if collections is not None: warnings.warn("Collections should not be specified with DatasetRef", stacklevel=3) # May need to retrieve datastore records if requested. - if datastore_records and datasetRefOrType.datastore_records is None: + if datastore_records and datasetRefOrType._datastore_records is None: datasetRefOrType = self._registry.get_datastore_records(datasetRefOrType) return datasetRefOrType timespan: Timespan | None = None @@ -1117,7 +1117,7 @@ def _findDatasetRef( # using the user definition such that the expected type is # returned. ref = DatasetRef( - datasetType, ref.dataId, run=ref.run, id=ref.id, datastore_records=ref.datastore_records + datasetType, ref.dataId, run=ref.run, id=ref.id, datastore_records=ref._datastore_records ) return ref diff --git a/python/lsst/daf/butler/_dataset_ref.py b/python/lsst/daf/butler/_dataset_ref.py index 3c25334f56..5b247d9186 100644 --- a/python/lsst/daf/butler/_dataset_ref.py +++ b/python/lsst/daf/butler/_dataset_ref.py @@ -320,7 +320,7 @@ class DatasetRef: "datasetType", "dataId", "run", - "datastore_records", + "_datastore_records", ) def __init__( @@ -348,7 +348,7 @@ def __init__( .makeDatasetId(self.run, self.datasetType, self.dataId, id_generation_mode) .int ) - self.datastore_records = datastore_records + self._datastore_records = datastore_records @property def id(self) -> DatasetId: @@ -431,9 +431,9 @@ def to_simple(self, minimal: bool = False) -> SerializedDatasetRef: return SerializedDatasetRef(**simple) datastore_records: Mapping[str, _DatastoreRecords] | None = None - if self.datastore_records is not None: + if self._datastore_records is not None: datastore_records = {} - for opaque_name, records in self.datastore_records.items(): + for opaque_name, records in self._datastore_records.items(): class_name, record_dicts = StoredDatastoreItemInfo.to_records(records) datastore_records[opaque_name] = class_name, list(record_dicts) @@ -578,7 +578,7 @@ def _unpickle( def __reduce__(self) -> tuple: return ( self._unpickle, - (self.datasetType, self.dataId, self.id, self.run, self.datastore_records), + (self.datasetType, self.dataId, self.id, self.run, self._datastore_records), ) def __deepcopy__(self, memo: dict) -> DatasetRef: @@ -607,7 +607,7 @@ def expanded(self, dataId: DataCoordinate) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def isComponent(self) -> bool: @@ -722,7 +722,7 @@ def makeCompositeRef(self) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def makeComponentRef(self, name: str) -> DatasetRef: @@ -748,7 +748,7 @@ def makeComponentRef(self, name: str) -> DatasetRef: id=self.id, run=self.run, conform=False, - datastore_records=self.datastore_records, + datastore_records=self._datastore_records, ) def overrideStorageClass(self, storageClass: str | StorageClass) -> DatasetRef: @@ -799,7 +799,7 @@ def replace( A new dataset reference with updated attributes. """ if datastore_records is False: - datastore_records = self.datastore_records + datastore_records = self._datastore_records if storage_class is None: datasetType = self.datasetType else: diff --git a/python/lsst/daf/butler/datastores/fileDatastore.py b/python/lsst/daf/butler/datastores/fileDatastore.py index fce89aa00a..58a6dc1a4e 100644 --- a/python/lsst/daf/butler/datastores/fileDatastore.py +++ b/python/lsst/daf/butler/datastores/fileDatastore.py @@ -428,8 +428,8 @@ def getStoredItemsInfo(self, ref: DatasetIdRef) -> list[StoredFileInfo]: list if no matching datasets can be found. """ # Try to get them from the ref first. - if ref.datastore_records is not None: - if (ref_records := ref.datastore_records.get(self._table.name)) is not None: + if ref._datastore_records is not None: + if (ref_records := ref._datastore_records.get(self._table.name)) is not None: # Need to make sure they have correct type. for record in ref_records: if not isinstance(record, StoredFileInfo): @@ -499,10 +499,10 @@ def _get_stored_records_associated_with_refs( records_by_ref: defaultdict[DatasetId, list[StoredFileInfo]] = defaultdict(list) refs_with_no_records = [] for ref in refs: - if ref.datastore_records is None: + if ref._datastore_records is None: refs_with_no_records.append(ref) else: - if (ref_records := ref.datastore_records.get(self._table.name)) is not None: + if (ref_records := ref._datastore_records.get(self._table.name)) is not None: # Need to make sure they have correct type. for ref_record in ref_records: if not isinstance(ref_record, StoredFileInfo): diff --git a/python/lsst/daf/butler/registries/sql.py b/python/lsst/daf/butler/registries/sql.py index b1a56345b9..6545ba1eb2 100644 --- a/python/lsst/daf/butler/registries/sql.py +++ b/python/lsst/daf/butler/registries/sql.py @@ -1367,8 +1367,8 @@ def store_datastore_records(self, refs: Mapping[str, DatasetRef]) -> None: bridge.insert([ref]) # store records in opaque tables - assert ref.datastore_records is not None, "Dataset ref must have datastore records" - for table_name, records in ref.datastore_records.items(): + assert ref._datastore_records is not None, "Dataset ref must have datastore records" + for table_name, records in ref._datastore_records.items(): opaque_table = self._managers.opaque.get(table_name) assert opaque_table is not None, f"Unexpected opaque table name {table_name}" opaque_table.insert(*(record.to_record(dataset_id=ref.id) for record in records)) diff --git a/python/lsst/daf/butler/registry/interfaces/_bridge.py b/python/lsst/daf/butler/registry/interfaces/_bridge.py index 8b005b0bbc..ccbc942d6e 100644 --- a/python/lsst/daf/butler/registry/interfaces/_bridge.py +++ b/python/lsst/daf/butler/registry/interfaces/_bridge.py @@ -93,7 +93,7 @@ def datasetType(self) -> DatasetType: raise AttributeError("A FakeDatasetRef can not be associated with a valid DatasetType") @property - def datastore_records(self) -> DatasetDatastoreRecords | None: + def _datastore_records(self) -> DatasetDatastoreRecords | None: raise AttributeError("A FakeDatasetRef can not be associated with datastore records") diff --git a/tests/test_datasets.py b/tests/test_datasets.py index d3965c4814..1d638b1e08 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -693,6 +693,12 @@ def testReplace(self) -> None: self.assertEqual(ref5.run, "somerun") self.assertEqual(ref5, ref) + self.assertIsNone(ref5._datastore_records) + ref5 = ref5.replace(datastore_records={}) + self.assertEqual(ref5._datastore_records, {}) + ref5 = ref5.replace(datastore_records=None) + self.assertIsNone(ref5._datastore_records) + def testPickle(self) -> None: ref = DatasetRef(self.datasetType, self.dataId, run="somerun") s = pickle.dumps(ref) @@ -708,8 +714,8 @@ def testJson(self) -> None: s = ref.to_json() ref2 = DatasetRef.from_json(s, universe=self.universe) self.assertEqual(ref2, ref) - self.assertIsNotNone(ref2.datastore_records) - self.assertEqual(ref2.datastore_records, ref.datastore_records) + self.assertIsNotNone(ref2._datastore_records) + self.assertEqual(ref2._datastore_records, ref._datastore_records) def testFileDataset(self) -> None: ref = DatasetRef(self.datasetType, self.dataId, run="somerun")