Skip to content

Commit

Permalink
Enchancing Metadata Support for SOAR (#118)
Browse files Browse the repository at this point in the history
* detector added

* test fixes/minor fixes

* added tests for detector and joins / combined join operations in a single function

* detector tests added, useful comments added to contruct methods

* suggestions applied

* cleaned the code, all suggestions applied

* comment added for join condition

* pre-commit fix

* code cleanup

* code cleanup

* wavelength added with testcases

* updated changelog

* test case updated

* Apply suggestions from code review

Co-authored-by: Nabil Freij <[email protected]>

* suggestions applied

* test cases simplified

* Update sunpy_soar/client.py

Co-authored-by: Laura Hayes <[email protected]>

* tests updated

* phi and it's tests removed

* removed low_latency detector test

* code cleanup, h2. prefix removed from attrs file

---------

Co-authored-by: Nabil Freij <[email protected]>
Co-authored-by: Laura Hayes <[email protected]>
  • Loading branch information
3 people authored Jul 6, 2024
1 parent a8a9b04 commit f92ed86
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 19 deletions.
1 change: 1 addition & 0 deletions changelog/118.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added support for ``detector`` and ``wavelength`` search attributes and the ability to filter with them.
12 changes: 12 additions & 0 deletions sunpy_soar/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,15 @@ def _(wlk, attr, params): # NOQA: ARG001
@walker.add_applier(SOOP)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"soop_name='{attr.value}'")


@walker.add_applier(a.Detector)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"Detector='{attr.value}'")


@walker.add_applier(a.Wavelength)
def _(wlk, attr, params): # NOQA: ARG001
wavemin = attr.min.value
wavemax = attr.max.value
params.append(f"Wavemin='{wavemin}'+AND+Wavemax='{wavemax}'")
125 changes: 106 additions & 19 deletions sunpy_soar/client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import pathlib
import re

import astropy.table
import astropy.units as u
Expand Down Expand Up @@ -35,35 +36,116 @@ def search(self, *query, **kwargs): # NOQA: ARG002
qrt.hide_keys = ["Data item ID", "Filename"]
return qrt

def add_join_to_query(query: list[str], data_table: str, instrument_table: str):
"""
Construct the WHERE, FROM, and SELECT parts of the ADQL query.
Parameters
----------
query : list[str]
List of query items.
data_table : str
Name of the data table.
instrument_table : str
Name of the instrument table.
Returns
-------
tuple[str, str, str]
WHERE, FROM, and SELECT parts of the query.
"""
final_query = ""
# Extract wavemin and wavemax individually
wavemin_pattern = re.compile(r"Wavemin='(\d+\.\d+)'")
wavemax_pattern = re.compile(r"Wavemax='(\d+\.\d+)'")
for parameter in query:
wavemin_match = wavemin_pattern.search(parameter)
wavemax_match = wavemax_pattern.search(parameter)
# If the wavemin and wavemax are same that means only one wavelength is given in query.
if wavemin_match and wavemax_match and float(wavemin_match.group(1)) == float(wavemax_match.group(1)):
# For PHI and SPICE, we can specify wavemin and wavemax in the query and get the results.
# For PHI we have wavelength data in both angstrom and nanometer without it being mentioned in the SOAR.
# For SPICE we get data in form of wavemin/wavemax columns, but only for the first spectral window.
# To make sure this data is not misleading to the user we do not return any values for PHI AND SPICE.
parameter = f"Wavelength='{wavemin_match.group(1)}'"
elif wavemin_match and wavemax_match:
parameter = f"Wavemin='{wavemin_match.group(1)}'+AND+h2.Wavemax='{wavemax_match.group(1)}'"
prefix = "h1." if not parameter.startswith("Detector") and not parameter.startswith("Wave") else "h2."
if parameter.startswith("begin_time"):
time_list = parameter.split("+AND+")
final_query += f"h1.{time_list[0]}+AND+h1.{time_list[1]}+AND+"
# As there are no dimensions in STIX, the dimension index need not be included in the query for STIX.
if "stx" not in instrument_table:
# To avoid duplicate rows in the output table, the dimension index is set to 1.
final_query += "h2.dimension_index='1'+AND+"
else:
final_query += f"{prefix}{parameter}+AND+"

where_part = final_query[:-5]
from_part = f"{data_table} AS h1"
select_part = (
"h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name"
)
if instrument_table:
from_part += f" JOIN {instrument_table} AS h2 USING (data_item_oid)"
select_part += ", h2.detector, h2.wavelength, h2.dimension_index"
return where_part, from_part, select_part

@staticmethod
def _construct_payload(query):
"""
Construct search payload.
Parameters
----------
payload : dict[str]
Payload to send to the TAP server.
query : list[str]
List of query items.
Returns
-------
dict
Payload dictionary to be sent with the query.
"""
# Construct ADQL query
url_query = {}
url_query["SELECT"] = "*"
# Assume science data by default
url_query["FROM"] = "v_sc_data_item"
# Default data table
data_table = "v_sc_data_item"
instrument_table = None
# Mapping is established between the SOAR instrument names and its corresponding SOAR instrument table alias.
instrument_mapping = {
"SOLOHI": "SHI",
"EUI": "EUI",
"STIX": "STX",
"SPICE": "SPI",
"PHI": "PHI",
"METIS": "MET",
}

instrument_name = None
for q in query:
if q.startswith("level") and q.split("=")[1][1:3] == "LL":
# Low latency data
url_query["FROM"] = "v_ll_data_item"
if q.startswith("instrument") or q.startswith("descriptor") and not instrument_name:
instrument_name = q.split("=")[1][1:-1].split("-")[0].upper()
elif q.startswith("level") and q.split("=")[1][1:3] == "LL":
data_table = "v_ll_data_item"

url_query["WHERE"] = "+AND+".join(query)
adql_query = "+".join([f"{item}+{url_query[item]}" for item in url_query])
if instrument_name:
if instrument_name in instrument_mapping:
instrument_name = instrument_mapping[instrument_name]
instrument_table = f"v_{instrument_name.lower()}_sc_fits"
if data_table == "v_ll_data_item" and instrument_table:
instrument_table = instrument_table.replace("_sc_", "_ll_")

return {
"REQUEST": "doQuery",
"LANG": "ADQL",
"FORMAT": "json",
"QUERY": adql_query,
}
# Need to establish join for remote sensing instruments as they have instrument tables in SOAR.
if instrument_name in ["EUI", "MET", "SPI", "PHI", "SHI"]:
where_part, from_part, select_part = SOARClient.add_join_to_query(query, data_table, instrument_table)
else:
from_part = data_table
select_part = "*"
where_part = "+AND+".join(query)

adql_query = {"SELECT": select_part, "FROM": from_part, "WHERE": where_part}

adql_query_str = "+".join([f"{key}+{value}" for key, value in adql_query.items()])
return {"REQUEST": "doQuery", "LANG": "ADQL", "FORMAT": "json", "QUERY": adql_query_str}

@staticmethod
def _do_search(query):
Expand Down Expand Up @@ -92,6 +174,7 @@ def _do_search(query):
# Do some list/dict wrangling
names = [m["name"] for m in r.json()["metadata"]]
info = {name: [] for name in names}

for entry in r.json()["data"]:
for i, name in enumerate(names):
info[name].append(entry[i])
Expand All @@ -113,6 +196,10 @@ def _do_search(query):
"SOOP Name": info["soop_name"],
},
)
if "detector" in info:
result_table["Detector"] = info["detector"]
if "wavelength" in info:
result_table["Wavelength"] = info["wavelength"]
result_table.sort("Start time")
return result_table

Expand Down Expand Up @@ -160,7 +247,7 @@ def _can_handle_query(cls, *query):
True if this client can handle the given query.
"""
required = {a.Time}
optional = {a.Instrument, a.Level, a.Provider, Product, SOOP}
optional = {a.Instrument, a.Detector, a.Wavelength, a.Level, a.Provider, Product, SOOP}
if not cls.check_attr_types_in_query(query, required, optional):
return False
# check to make sure the instrument attr passed is one provided by the SOAR.
Expand Down
118 changes: 118 additions & 0 deletions sunpy_soar/tests/test_sunpy_soar.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,121 @@ def test_when_wrong_provider_passed():
provider = a.Provider.noaa
res = Fido.search(time & instrument & provider)
assert len(res) == 0


def test_search_wavelength_detector_column():
instrument = a.Instrument("EUI")
time = a.Time("2021-02-01", "2021-02-02")
level = a.Level(1)
product = a.soar.Product("EUI-FSI174-IMAGE")
res = Fido.search(instrument & time & level & product)
assert "Wavelength" in res[0].columns
assert "Detector" in res[0].columns


def test_search_detector_instrument_dimension_2():
# Instruments "EUI", "METIS", "PHI" and "SOLOHI" have two dimensions in the SOAR data.
# Selecting no dimension index in the query results in two identical output rows.
# To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition.
instrument = a.Instrument("EUI")
time = a.Time("2020-03-03", "2020-03-04")
level = a.Level(1)
detector = a.Detector("HRI_EUV")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 266


def test_search_detector_instrument_dimension_4():
# The "SPICE" instrument has four dimensions in the SOAR data. As a result,
# selecting no dimension index in the query results in four identical output rows.
# To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition.
instrument = a.Instrument("SPICE")
time = a.Time("2023-03-03 15:00", "2023-03-03 16:00")
level = a.Level(1)
detector = a.Detector("SW")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 11


def test_invalid_detector():
instrument = a.Instrument("SPICE")
time = a.Time("2023-03-03 15:00", "2023-03-03 16:00")
level = a.Level(1)
detector = a.Detector("hello")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 0


def test_wavelength_column_wavelength_exists():
# For instruments EUI, METIS and SOLOHI "wavelength" column is available.
# Test to check if the "Wavelength" column exists in the search results.
instrument = a.Instrument("EUI")
time = a.Time("2023-04-03 15:00", "2023-04-03 16:00")
level = a.Level(1)
wavelength = a.Wavelength(304 * u.AA)
res = Fido.search(instrument & time & level & wavelength)
assert "Wavelength" in res[0].columns
assert res.file_num == 12


def test_wavelength_single():
# Test to check if the wavelength value is filtered for a single value provided.
instrument = a.Instrument("EUI")
time = a.Time("2023-04-03 15:00", "2023-04-03 16:00")
level = a.Level(1)
wavelength = a.Wavelength(304 * u.AA)
res = Fido.search(instrument & time & level & wavelength)
for table in res:
assert all(table["Wavelength"] == 304)


def test_wavelength_range():
# Test to check if the wavelength value is filtered for wavemin and wavemax provided.
instrument = a.Instrument("EUI")
time = a.Time("2023-04-03 15:00", "2023-04-03 16:00")
level = a.Level(1)
wavelength = a.Wavelength(171 * u.AA, 185 * u.AA)
res = Fido.search(instrument & time & level & wavelength)
for table in res:
assert all(table["Wavelength"] == 174)


def test_join_science_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='L1'",
"descriptor='eui-fsi174-image'",
]
)

assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, h2.wavelength, "
"h2.dimension_index+FROM+v_sc_data_item AS h1 JOIN v_eui_sc_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='L1'+AND+h1.descriptor='eui-fsi174-image'"
)


def test_join_low_latency_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='LL01'",
"descriptor='eui-fsi174-image'",
]
)

assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, h2.wavelength, "
"h2.dimension_index+FROM+v_ll_data_item AS h1 JOIN v_eui_ll_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='LL01'+AND+h1.descriptor='eui-fsi174-image'"
)

0 comments on commit f92ed86

Please sign in to comment.