Skip to content

Commit

Permalink
Adding an .original_row attribute to DatalinkResults.
Browse files Browse the repository at this point in the history
It also does some minor improvements to the documentation of how to use
datalink.  But that part could really use a lot more love...
  • Loading branch information
msdemlei committed Jul 2, 2024
1 parent 5b2cc53 commit f7b0f79
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 24 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ Enhancements and Fixes
- RegTAP constraints involving tables other than rr.resource are now
done via subqueries for less duplication of interfaces. [#562]

- Where datalink records are made from table rows, the table row is
now accessible as datalinks.original_row. []


Deprecations and Removals
-------------------------
Expand Down
34 changes: 21 additions & 13 deletions docs/dal/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -796,17 +796,22 @@ as quantities):
>>> astropy_table = resultset.to_table()
>>> astropy_qtable = resultset.to_qtable()

Multiple datasets
-----------------
PyVO supports multiple datasets exposed on record level through the datalink.
To get an iterator yielding specific datasets, call
:py:meth:`pyvo.dal.adhoc.DatalinkResults.bysemantics` with the identifier
identifying the dataset you want it to return.
Datalink
--------

.. remove skip once https://github.com/astropy/pyvo/issues/361 is fixed
.. doctest-skip::
Datalink lets operators associate multiple artefacts with a dataset.
Examples include linking raw data, applicable or applied calibration
data, derived datasets such as extracted sources, extra documentation,
and much more.

>>> preview = next(row.getdatalink().bysemantics('#preview')).getdataset()
Datalink can both be used on result rows of queries and from
datalink-valued URLs. The typical use is to call ``iter_datalinks()``
on some DAL result; this will iterate over all datalinks pyVO finds in a
document and yields :py:class:`pyvo.dal.adhoc.DatalinkResults` instances
for them. In those, you can, for instance, pick out items by semantics,
where the standard vocabulary datalink documents use is documented at
http://www.ivoa.net/rdf/datalink/core. Here is how to find URLs for
previews:

.. doctest-remote-data::
>>> rows = vo.dal.TAPService("http://dc.g-vo.org/tap"
Expand Down Expand Up @@ -848,15 +853,18 @@ DatalinkResults using
>>> # In this example you know the URL from somewhere
>>> url = 'https://ws.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/caom2ops/datalink?ID=ivo%3A%2F%2Fcadc.nrc.ca%2FHSTHLA%3Fhst_12477_28_acs_wfc_f606w_01%2Fhst_12477_28_acs_wfc_f606w_01_drz'
>>> datalink = DatalinkResults.from_result_url(url)
>>> next(datalink.bysemantics("#this")).content_type
'application/fits'


Server-side processing
----------------------
Some services support the server-side processing of record datasets.
This includes spatial cutouts for 2d-images, reducing of spectra to a certain
waveband range, and many more depending on the service.

Datalink
^^^^^^^^
Generic Datalink Processing Service
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Generic access to processing services is provided through the datalink
interface.

Expand All @@ -866,8 +874,8 @@ interface.
>>> datalink_proc = next(row.getdatalink().bysemantics('#proc'))

.. note::
most times there is only one processing service per result, and thats all you
need.
Most datalink documents only have one processing service per dataset,
which is why there is the ``get_first_proc`` shortcut mentioned below.


The returned object lets you access the available input parameters which you
Expand Down
49 changes: 39 additions & 10 deletions pyvo/dal/adhoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ def iter_datalinks(self):
if batch_size is None:
# first call.
self.query = DatalinkQuery.from_resource(
[_ for _ in self], self._datalink, session=self._session)
[_ for _ in self],
self._datalink,
session=self._session,
original_row=row)
remaining_ids = self.query['ID']
if not remaining_ids:
# we are done
Expand All @@ -217,9 +220,13 @@ def iter_datalinks(self):
id1 = current_ids.pop(0)
processed_ids.append(id1)
remaining_ids.remove(id1)
yield current_batch.clone_byid(id1)
yield current_batch.clone_byid(
id1,
original_row=row)
elif row.access_format == DATALINK_MIME_TYPE:
yield DatalinkResults.from_result_url(row.getdataurl())
yield DatalinkResults.from_result_url(
row.getdataurl(),
original_row=row)
else:
yield None

Expand Down Expand Up @@ -366,6 +373,8 @@ def from_resource(cls, rows, resource, *, session=None, **kwargs):
ref="srcGroup"/>
</GROUP>
"""
original_row = kwargs.pop("original_row", None)

input_params = _get_input_params_from_resource(resource)
# get params outside of any group
dl_params = _get_params_from_resource(resource)
Expand Down Expand Up @@ -402,7 +411,11 @@ def from_resource(cls, rows, resource, *, session=None, **kwargs):
except KeyError:
query_params[name] = query_param

return cls(accessurl, session=session, **query_params)
return cls(
accessurl,
session=session,
original_row=original_row,
**query_params)

def __init__(
self, baseurl, *, id=None, responseformat=None, session=None, **keywords):
Expand All @@ -420,6 +433,8 @@ def __init__(
session : object
optional session to use for network requests
"""
self.original_row = keywords.pop("original_row", None)

super().__init__(baseurl, session=session, **keywords)

if id is not None:
Expand All @@ -441,8 +456,11 @@ def execute(self, post=False):
DALFormatError
for errors parsing the VOTable response
"""
return DatalinkResults(self.execute_votable(post=post),
url=self.queryurl, session=self._session)
return DatalinkResults(
self.execute_votable(post=post),
url=self.queryurl,
original_row=self.original_row,
session=self._session)


class DatalinkResults(DatalinkResultsMixin, DALResults):
Expand Down Expand Up @@ -488,6 +506,10 @@ class DatalinkResults(DatalinkResultsMixin, DALResults):
a Numpy array.
"""

def __init__(self, *args, **kwargs):
self.original_row = kwargs.pop("original_row", None)
super().__init__(*args, **kwargs)

def getrecord(self, index):
"""
return a representation of a datalink result record that follows
Expand All @@ -503,7 +525,7 @@ def getrecord(self, index):
Returns
-------
REc
Rec
a dictionary-like wrapper containing the result record metadata.
Raises
Expand Down Expand Up @@ -569,10 +591,10 @@ def bysemantics(self, semantics, *, include_narrower=True):
if record.semantics in semantics:
yield record

def clone_byid(self, id):
def clone_byid(self, id, *, original_row=None):
"""
return a clone of the object with results and corresponding
resources matching a given id
resources matching a given id
Returns
-------
Expand All @@ -597,7 +619,7 @@ def clone_byid(self, id):
for x in copy_tb.resources:
if x.ID and x.ID not in referenced_serviced:
copy_tb.resources.remove(x)
return DatalinkResults(copy_tb)
return DatalinkResults(copy_tb, original_row=original_row)

def getdataset(self, *, timeout=None):
"""
Expand Down Expand Up @@ -629,6 +651,13 @@ def get_first_proc(self):
return proc
raise IndexError("No processing service found in datalink result")

@classmethod
def from_result_url(cls, result_url, *, session=None, original_row=None):
res = super(DatalinkResults, cls).from_result_url(
result_url, session=session)
res.original_row = original_row
return res


class SodaRecordMixin:
"""
Expand Down
8 changes: 7 additions & 1 deletion pyvo/dal/tests/test_datalink.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def test_datalink():

datalinks = next(results.iter_datalinks())

assert datalinks.original_row["accsize"] == 100800

row = datalinks[0]
assert row.semantics == "#progenitor"

Expand All @@ -132,7 +134,9 @@ def test_datalink_batch():
results = vo.dal.imagesearch(
'http://example.com/obscore', (30, 30))

assert len([_ for _ in results.iter_datalinks()]) == 3
dls = list(results.iter_datalinks())
assert len(dls) == 3
assert dls[0].original_row["obs_collection"] == "MACHO"


@pytest.mark.usefixtures('proc', 'datalink_vocabulary')
Expand All @@ -143,6 +147,8 @@ def test_datalink_batch():
class TestSemanticsRetrieval:
def test_access_with_string(self):
datalinks = DatalinkResults.from_result_url('http://example.com/proc')

assert datalinks.original_row is None
res = [r["access_url"] for r in datalinks.bysemantics("#this")]
assert len(res) == 1
assert res[0].endswith("eq010000ms/20100927.comb_avg.0001.fits.fz")
Expand Down

0 comments on commit f7b0f79

Please sign in to comment.