From 7a9438229d6e7635b645d7b33bd0883710252699 Mon Sep 17 00:00:00 2001 From: Jaffe Date: Wed, 4 Mar 2020 16:16:04 -0500 Subject: [PATCH] Added .examples to TAPService; updated discovery notebook --- CHANGES.rst | 2 + .../notebooks/simple_service_discovery.ipynb | 471 ++++++++++-------- pyvo/dal/tap.py | 30 ++ pyvo/dal/tests/data/tap/examples.htm | 1 + pyvo/dal/tests/setup_package.py | 1 + pyvo/dal/tests/test_tap.py | 21 + 6 files changed, 327 insertions(+), 199 deletions(-) create mode 100644 pyvo/dal/tests/data/tap/examples.htm diff --git a/CHANGES.rst b/CHANGES.rst index 7d93e2a18..6d2822097 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,8 @@ 1.1 (unreleased) ================ +- Added TAP examples function. [#220] + - Add default for UWS version. [#199] - Handle description of None when describing a TAP service's tables. [#197] diff --git a/examples/notebooks/simple_service_discovery.ipynb b/examples/notebooks/simple_service_discovery.ipynb index d9c806ed9..83190afd3 100644 --- a/examples/notebooks/simple_service_discovery.ipynb +++ b/examples/notebooks/simple_service_discovery.ipynb @@ -1,202 +1,275 @@ { + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pyvo as vo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The most common use of the registry is to find archives with \n", + "`VO data access services`, and the simplest way\n", + "to do this is to use the \n", + "`~pyvo.regsearch` function. For example, to find data\n", + "collections that contain searchable X-ray images:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "services = vo.regsearch(servicetype='sia', waveband='x-ray')\n", + "len(services)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Aha! Perhaps you didn't realize there were that many.\n", + "What collections are these?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for service in services:\n", + " print(service.res_title)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As you can gather, each record in the registry search results\n", + "represents a different service (in this case, an image service).\n", + "Included in the record is the all-important base URL for the service:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(services[0].access_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, it's not necessary to keep track of that URL because you can\n", + "now search that collection directly via the registry record:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "images = services[0].search(pos=(350.85, 58.815), size=0.25)\n", + "len(images)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Other types of services via the ``servicetype`` parameter:\n", + "\n", + "| set ``servicetype`` to... | ...to find:\n", + "| ------------------------- | -------------------------------------\n", + "| sia | Simple Image Access (SIA) services\n", + "| ssa | Simple Spectral Access (SSA) services\n", + "| scs | Simple Cone Search (SCS) services\n", + "| slap | Simple Line Access (SLA) services\n", + "| tap | Table Access Protocol (TAP) services\n", + "\n", + "For example, to find all known Cone Search services:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cats = vo.regsearch(servicetype='conesearch')\n", + "len(cats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Wow, that's a lot of catalogs. (Most of these are from the\n", + "`Vizier Catalog Archive `)\n", + "every Vizier catalog that includes a position is available as a Cone\n", + "Search service.) For just catalogs related to blazars:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cats = vo.regsearch(keywords=['blazar'], servicetype='conesearch')\n", + "len(cats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How about blazars observed with Fermi?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cats = vo.regsearch(keywords=['blazar', 'Fermi'], servicetype='conesearch')\n", + "len(cats)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sometimes you may be looking for a particular catalog or image collections\n", + "that you already know exists, and you just need to learn the base URL\n", + "for the service. The ``keywords`` parameter can be used to find it.\n", + "For example, suppose you want to get cutout images from the NRAO VLA\n", + "Sky Survey (NVSS):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "colls = vo.regsearch(keywords=[\"NVSS\"], servicetype='sia')\n", + "for coll in colls:\n", + " print(coll.res_title)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "More powerful than a cone search is a generic TAP service. These services optionally provide helpful information on how to use them. First, let's see what services are available that serve NASA data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "taps = vo.regsearch(servicetype='tap',keywords=['HEASARC'])\n", + "for tapsvc in taps:\n", + " tapsvc.describe()\n", + " print(\"---------\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we need to find out what tables each service serves, because each *may* server many. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tables=taps[0].service.tables\n", + "for table in tables:\n", + " table.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see example queries if the service offers them. This .examples method returns a TAPQuery object that can be executed. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for example in taps[0].service.examples:\n", + " print(example['QUERY'])\n", + " result=example.execute()\n", + " break\n", + "result.to_table()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And for each table, you can find out the available columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Columns={}\".format(sorted([k.name for k in tables['a1'].columns ])))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], "metadata": { - "name": "", - "signature": "sha256:e5bd7bbcbb2f77cb4950888f893c03f6a26e81270b3d9c3d8e35af9c004f431e" - }, - "nbformat": 3, - "nbformat_minor": 0, - "worksheets": [ - { - "cells": [ - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import pyvo as vo" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The most common use of the registry is to find archives with \n", - "`VO data access services`, and the simplest way\n", - "to do this is to use the \n", - "`~pyvo.regsearch` function. For example, to find data\n", - "collections that contain searchable X-ray images:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "services = vo.regsearch(servicetype='sia', waveband='x-ray')\n", - "len(services)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Aha! Perhaps you didn't realize there were that many.\n", - "What collections are these?" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "for service in services:\n", - " print(service.res_title)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As you can gather, each record in the registry search results\n", - "represents a different service (in this case, an image service).\n", - "Included in the record is the all-important base URL for the service:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "print(services[0].access_url)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, it's not necessary to keep track of that URL because you can\n", - "now search that collection directly via the registry record:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "images = services[0].search(pos=(350.85, 58.815), size=0.25)\n", - "len(images)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Other types of services via the ``servicetype`` parameter:\n", - "\n", - "| set ``servicetype`` to... | ...to find:\n", - "| ------------------------- | -------------------------------------\n", - "| sia | Simple Image Access (SIA) services\n", - "| ssa | Simple Spectral Access (SSA) services\n", - "| scs | Simple Cone Search (SCS) services\n", - "| slap | Simple Line Access (SLA) services\n", - "| tap | Table Access Protocol (TAP) services\n", - "\n", - "For example, to find all known Cone Search services:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cats = vo.regsearch(servicetype='conesearch')\n", - "len(cats)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Wow, that's a lot of catalogs. (Most of these are from the\n", - "`Vizier Catalog Archive `)\n", - "every Vizier catalog that includes a position is available as a Cone\n", - "Search service.) For just catalogs related to blazars:" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cats = vo.regsearch(keywords=['blazar'], servicetype='conesearch')\n", - "len(cats)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "How about blazars observed with Fermi?" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "cats = vo.regsearch(keywords=['blazar', 'Fermi'], servicetype='conesearch')\n", - "len(cats)" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Sometimes you may be looking for a particular catalog or image collections\n", - "that you already know exists, and you just need to learn the base URL\n", - "for the service. The ``keywords`` parameter can be used to find it.\n", - "For example, suppose you want to get cutout images from the NRAO VLA\n", - "Sky Survey (NVSS):" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "colls = vo.regsearch(keywords=[\"NVSS\"], servicetype='sia')\n", - "for coll in colls:\n", - " print coll.res_title" - ], - "language": "python", - "metadata": {}, - "outputs": [] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [], - "language": "python", - "metadata": {}, - "outputs": [] - } - ], - "metadata": {} + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" } - ] -} \ No newline at end of file + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/pyvo/dal/tap.py b/pyvo/dal/tap.py index e446fe8f0..3fe578607 100644 --- a/pyvo/dal/tap.py +++ b/pyvo/dal/tap.py @@ -23,6 +23,8 @@ from ..utils.formatting import para_format_desc from ..utils.http import use_session +import xml.etree.ElementTree +import io __all__ = [ "search", "escape", "TAPService", "TAPQuery", "AsyncTAPJob", "TAPResults"] @@ -95,6 +97,7 @@ class TAPService(DALService, AvailabilityMixin, CapabilityMixin): """ _tables = None + _examples = None def __init__(self, baseurl, session=None): """ @@ -138,6 +141,33 @@ def tables(self): vosi.parse_tables(response.raw.read), tables_url) return self._tables + @property + def examples(self): + """ + returns examples as a list of TAPQuery objects + """ + if self._examples is None: + examples_url = '{}/examples'.format(self.baseurl) + + response = self._session.get(examples_url, stream=True) + if response.status_code == 404: + return [] + + try: + response.raise_for_status() + except requests.RequestException as ex: + raise DALServiceError.from_except(ex, examples_url) + + try: + root = xml.etree.ElementTree.parse(io.BytesIO(response.content)).getroot() + exampleElements = root.findall('.//*[@property="query"]') + except Exception as ex: + raise DALServiceError.from_except(ex, examples_url) + + self._examples = [TAPQuery(self.baseurl, example.text) for example in exampleElements] + + return self._examples + @property def maxrec(self): """ diff --git a/pyvo/dal/tests/data/tap/examples.htm b/pyvo/dal/tests/data/tap/examples.htm new file mode 100644 index 000000000..c4f85972a --- /dev/null +++ b/pyvo/dal/tests/data/tap/examples.htm @@ -0,0 +1 @@ + Examples queries for HEASARC's TAP service

Examples for the TAP service at HEASARC

Simple geometric query on rosmaster with circle and point

The Table Access Protocol Service at HEASARC allow for simple geometric queries. For example, this query searches for observations in the rostmaster catalog within a circle of radius 1 degree of the coordinates (ra,dec)=(50,-85) -- i.e., basically a cone search -- and an exposure longer than 10000 seconds:

            SELECT * FROM rosmaster                      WHERE exposure > 10000 and                            1=CONTAINS(POINT('ICRS', ra, dec),CIRCLE('ICRS', 50, -85, 1))          

Simple geometric query on rosmaster with circle and point

The Table Access Protocol Service at HEASARC allow for simple geometric and cross-match queries. For example, this query searches for observations common to the rostmaster catalog AND the chanmaster catalog with a minimum exposure of 10ks.

	   SELECT * FROM rosmaster as ros	            INNER JOIN chanmaster as chan	                ON ros.name = chan.name	            WHERE ros.exposure > 10000 and chan.exposure > 10000	            ORDER by ros.exposure	             

Simple geometric query on rosmaster with intersects, circle, point

The Table Access Protocol Service at HEASARC allow for simple geometric queries. For example, this (slow) query searches for observations in the rostmaster catalog where a circle of radius 1 degree of the coordinates (ra,dec)=(50,-85) intersects with a circle of 1 degree radius around the center of the pointing:

            SELECT * FROM rosmaster                      WHERE 1=INTERSECTS(CIRCLE('ICRS', ra, dec,1),CIRCLE('ICRS', 50, -85, 1))          

Simple geometric query on rosmaster with polygon

The Table Access Protocol Service at HEASARC allow for simple geometric queries. For example, this query searches for observations in the rostmaster catalog where a pointing lies with a user-defined polygon, in this case a triangle with vertices (-5,-5), (5,-5), and (0,5)):

            SELECT * FROM rosmaster                      WHERE  exposure > 10000 AND                             1=CONTAINS(POINT('ICRS', ra, dec),POLYGON('ICRS', -5, -5, 5, -5, 0, 5))          

Simple geometric query on rosmaster, chanmaster with join on distance

The Table Access Protocol Service at HEASARC allow for simple geometric queries. For example, this query computes the distance between rostmaster observations and a given point, selecting those observations near it, and ordering the result by that distance:

          SELECT DISTANCE(	      POINT('ICRS', ra, dec),              POINT('ICRS', 266.41683, -29.00781)) AS dist, *	  FROM rosmaster	  WHERE 1=CONTAINS(	      POINT('ICRS', ra, dec),	      CIRCLE('ICRS', 266.41683, -29.00781, 1))	  ORDER BY dist ASC          
\ No newline at end of file diff --git a/pyvo/dal/tests/setup_package.py b/pyvo/dal/tests/setup_package.py index 903ea1950..6a5c01634 100644 --- a/pyvo/dal/tests/setup_package.py +++ b/pyvo/dal/tests/setup_package.py @@ -9,6 +9,7 @@ def get_package_data(): os.path.join('data/querydata', '*.xml'), os.path.join('data/querydata', '*.xml'), os.path.join('data/tap', '*.xml'), + os.path.join('data/tap', '*.htm'), os.path.join('data/scs', '*.xml'), os.path.join('data/sia', '*.xml'), os.path.join('data/sia2', '*.xml'), diff --git a/pyvo/dal/tests/test_tap.py b/pyvo/dal/tests/test_tap.py index f9f51aed6..d976abf60 100644 --- a/pyvo/dal/tests/test_tap.py +++ b/pyvo/dal/tests/test_tap.py @@ -298,6 +298,17 @@ def callback_table2(request, context): yield matchers +@pytest.fixture() +def examples(mocker): + def callback_examplesXHTML(request, context): + return get_pkg_data_contents('data/tap/examples.htm') + + with mocker.register_uri( + 'GET', 'http://example.com/tap/examples', content=callback_examplesXHTML + ) as matcher: + yield matcher + + @pytest.fixture() def capabilities(mocker): def callback(request, context): @@ -349,6 +360,16 @@ def test_tables(self): table1, table2 = list(tables) self._test_tables(table1, table2) + def _test_examples(self, exampleXHTML): + assert "SELECT * FROM rosmaster" in exampleXHTML[0]['QUERY'] + + @pytest.mark.usefixtures('examples') + def test_examples(self): + service = TAPService('http://example.com/tap') + examples = service.examples + + self._test_examples(examples) + @pytest.mark.usefixtures('capabilities') def test_maxrec(self): service = TAPService('http://example.com/tap')