From f7b0f79cab9948c8c0f2538b9e566ffc0c6a7322 Mon Sep 17 00:00:00 2001 From: Markus Demleitner Date: Thu, 20 Jun 2024 08:58:00 +0200 Subject: [PATCH] Adding an .original_row attribute to DatalinkResults. It also does some minor improvements to the documentation of how to use datalink. But that part could really use a lot more love... --- CHANGES.rst | 3 ++ docs/dal/index.rst | 34 ++++++++++++++--------- pyvo/dal/adhoc.py | 49 ++++++++++++++++++++++++++------- pyvo/dal/tests/test_datalink.py | 8 +++++- 4 files changed, 70 insertions(+), 24 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1f8c57ceb..aefe677d6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -37,6 +37,9 @@ Enhancements and Fixes - RegTAP constraints involving tables other than rr.resource are now done via subqueries for less duplication of interfaces. [#562] +- Where datalink records are made from table rows, the table row is + now accessible as datalinks.original_row. [] + Deprecations and Removals ------------------------- diff --git a/docs/dal/index.rst b/docs/dal/index.rst index 84ec4ce0e..3b7b98822 100644 --- a/docs/dal/index.rst +++ b/docs/dal/index.rst @@ -796,17 +796,22 @@ as quantities): >>> astropy_table = resultset.to_table() >>> astropy_qtable = resultset.to_qtable() -Multiple datasets ------------------ -PyVO supports multiple datasets exposed on record level through the datalink. -To get an iterator yielding specific datasets, call -:py:meth:`pyvo.dal.adhoc.DatalinkResults.bysemantics` with the identifier -identifying the dataset you want it to return. +Datalink +-------- -.. remove skip once https://github.com/astropy/pyvo/issues/361 is fixed -.. doctest-skip:: +Datalink lets operators associate multiple artefacts with a dataset. +Examples include linking raw data, applicable or applied calibration +data, derived datasets such as extracted sources, extra documentation, +and much more. - >>> preview = next(row.getdatalink().bysemantics('#preview')).getdataset() +Datalink can both be used on result rows of queries and from +datalink-valued URLs. The typical use is to call ``iter_datalinks()`` +on some DAL result; this will iterate over all datalinks pyVO finds in a +document and yields :py:class:`pyvo.dal.adhoc.DatalinkResults` instances +for them. In those, you can, for instance, pick out items by semantics, +where the standard vocabulary datalink documents use is documented at +http://www.ivoa.net/rdf/datalink/core. Here is how to find URLs for +previews: .. doctest-remote-data:: >>> rows = vo.dal.TAPService("http://dc.g-vo.org/tap" @@ -848,6 +853,9 @@ DatalinkResults using >>> # In this example you know the URL from somewhere >>> url = 'https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2ops/datalink?ID=ivo%3A%2F%2Fcadc.nrc.ca%2FHSTHLA%3Fhst_12477_28_acs_wfc_f606w_01%2Fhst_12477_28_acs_wfc_f606w_01_drz' >>> datalink = DatalinkResults.from_result_url(url) + >>> next(datalink.bysemantics("#this")).content_type + 'application/fits' + Server-side processing ---------------------- @@ -855,8 +863,8 @@ Some services support the server-side processing of record datasets. This includes spatial cutouts for 2d-images, reducing of spectra to a certain waveband range, and many more depending on the service. -Datalink -^^^^^^^^ +Generic Datalink Processing Service +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Generic access to processing services is provided through the datalink interface. @@ -866,8 +874,8 @@ interface. >>> datalink_proc = next(row.getdatalink().bysemantics('#proc')) .. note:: - most times there is only one processing service per result, and thats all you - need. + Most datalink documents only have one processing service per dataset, + which is why there is the ``get_first_proc`` shortcut mentioned below. The returned object lets you access the available input parameters which you diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index ccfab6ad4..8d92e3b78 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -198,7 +198,10 @@ def iter_datalinks(self): if batch_size is None: # first call. self.query = DatalinkQuery.from_resource( - [_ for _ in self], self._datalink, session=self._session) + [_ for _ in self], + self._datalink, + session=self._session, + original_row=row) remaining_ids = self.query['ID'] if not remaining_ids: # we are done @@ -217,9 +220,13 @@ def iter_datalinks(self): id1 = current_ids.pop(0) processed_ids.append(id1) remaining_ids.remove(id1) - yield current_batch.clone_byid(id1) + yield current_batch.clone_byid( + id1, + original_row=row) elif row.access_format == DATALINK_MIME_TYPE: - yield DatalinkResults.from_result_url(row.getdataurl()) + yield DatalinkResults.from_result_url( + row.getdataurl(), + original_row=row) else: yield None @@ -366,6 +373,8 @@ def from_resource(cls, rows, resource, *, session=None, **kwargs): ref="srcGroup"/> """ + original_row = kwargs.pop("original_row", None) + input_params = _get_input_params_from_resource(resource) # get params outside of any group dl_params = _get_params_from_resource(resource) @@ -402,7 +411,11 @@ def from_resource(cls, rows, resource, *, session=None, **kwargs): except KeyError: query_params[name] = query_param - return cls(accessurl, session=session, **query_params) + return cls( + accessurl, + session=session, + original_row=original_row, + **query_params) def __init__( self, baseurl, *, id=None, responseformat=None, session=None, **keywords): @@ -420,6 +433,8 @@ def __init__( session : object optional session to use for network requests """ + self.original_row = keywords.pop("original_row", None) + super().__init__(baseurl, session=session, **keywords) if id is not None: @@ -441,8 +456,11 @@ def execute(self, post=False): DALFormatError for errors parsing the VOTable response """ - return DatalinkResults(self.execute_votable(post=post), - url=self.queryurl, session=self._session) + return DatalinkResults( + self.execute_votable(post=post), + url=self.queryurl, + original_row=self.original_row, + session=self._session) class DatalinkResults(DatalinkResultsMixin, DALResults): @@ -488,6 +506,10 @@ class DatalinkResults(DatalinkResultsMixin, DALResults): a Numpy array. """ + def __init__(self, *args, **kwargs): + self.original_row = kwargs.pop("original_row", None) + super().__init__(*args, **kwargs) + def getrecord(self, index): """ return a representation of a datalink result record that follows @@ -503,7 +525,7 @@ def getrecord(self, index): Returns ------- - REc + Rec a dictionary-like wrapper containing the result record metadata. Raises @@ -569,10 +591,10 @@ def bysemantics(self, semantics, *, include_narrower=True): if record.semantics in semantics: yield record - def clone_byid(self, id): + def clone_byid(self, id, *, original_row=None): """ return a clone of the object with results and corresponding - resources matching a given id + resources matching a given id Returns ------- @@ -597,7 +619,7 @@ def clone_byid(self, id): for x in copy_tb.resources: if x.ID and x.ID not in referenced_serviced: copy_tb.resources.remove(x) - return DatalinkResults(copy_tb) + return DatalinkResults(copy_tb, original_row=original_row) def getdataset(self, *, timeout=None): """ @@ -629,6 +651,13 @@ def get_first_proc(self): return proc raise IndexError("No processing service found in datalink result") + @classmethod + def from_result_url(cls, result_url, *, session=None, original_row=None): + res = super(DatalinkResults, cls).from_result_url( + result_url, session=session) + res.original_row = original_row + return res + class SodaRecordMixin: """ diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index 676ff214d..daf77b27a 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -110,6 +110,8 @@ def test_datalink(): datalinks = next(results.iter_datalinks()) + assert datalinks.original_row["accsize"] == 100800 + row = datalinks[0] assert row.semantics == "#progenitor" @@ -132,7 +134,9 @@ def test_datalink_batch(): results = vo.dal.imagesearch( 'http://example.com/obscore', (30, 30)) - assert len([_ for _ in results.iter_datalinks()]) == 3 + dls = list(results.iter_datalinks()) + assert len(dls) == 3 + assert dls[0].original_row["obs_collection"] == "MACHO" @pytest.mark.usefixtures('proc', 'datalink_vocabulary') @@ -143,6 +147,8 @@ def test_datalink_batch(): class TestSemanticsRetrieval: def test_access_with_string(self): datalinks = DatalinkResults.from_result_url('http://example.com/proc') + + assert datalinks.original_row is None res = [r["access_url"] for r in datalinks.bysemantics("#this")] assert len(res) == 1 assert res[0].endswith("eq010000ms/20100927.comb_avg.0001.fits.fz")