Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enchancing Metadata Support for SOAR #118

Merged
merged 24 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
122e964
detector added
NucleonGodX May 27, 2024
0e3307b
test fixes/minor fixes
NucleonGodX May 28, 2024
70c4c32
added tests for detector and joins / combined join operations in a si…
NucleonGodX May 29, 2024
a7f7102
detector tests added, useful comments added to contruct methods
NucleonGodX May 30, 2024
3d15042
suggestions applied
NucleonGodX May 31, 2024
4748035
cleaned the code, all suggestions applied
NucleonGodX May 31, 2024
f5ac3ec
comment added for join condition
NucleonGodX Jun 1, 2024
8496fed
pre-commit fix
NucleonGodX Jun 1, 2024
f4340d8
code cleanup
NucleonGodX Jun 2, 2024
074bb5c
code cleanup
NucleonGodX Jun 2, 2024
f9a789c
wavelength added with testcases
NucleonGodX Jun 10, 2024
cec907d
updated changelog
NucleonGodX Jun 13, 2024
1873850
test case updated
NucleonGodX Jun 14, 2024
76e1900
Apply suggestions from code review
NucleonGodX Jun 20, 2024
d920659
suggestions applied
NucleonGodX Jun 20, 2024
d1b4510
Merge branch 'main' into gsoc24-soar
NucleonGodX Jun 20, 2024
6fc6921
test cases simplified
NucleonGodX Jun 22, 2024
f9352f1
Merge branch 'main' into gsoc24-soar
NucleonGodX Jun 27, 2024
52366cc
Update sunpy_soar/client.py
NucleonGodX Jun 27, 2024
6b838c3
tests updated
NucleonGodX Jun 27, 2024
183c2d8
phi and it's tests removed
NucleonGodX Jul 4, 2024
645aca5
removed low_latency detector test
NucleonGodX Jul 4, 2024
bd8bc31
code cleanup, h2. prefix removed from attrs file
NucleonGodX Jul 6, 2024
616f96b
Merge branch 'main' into gsoc24-soar
NucleonGodX Jul 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sunpy_soar/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ def _(wlk, attr, params): # NOQA: ARG001
@walker.add_applier(SOOP)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"soop_name='{attr.value}'")


@walker.add_applier(a.Detector)
def _(wlk, attr, params): # NOQA: ARG001
params.append(f"Detector='{attr.value}'")
110 changes: 91 additions & 19 deletions sunpy_soar/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,35 +35,104 @@ def search(self, *query, **kwargs): # NOQA: ARG002
qrt.hide_keys = ["Data item ID", "Filename"]
return qrt

def construct_join(query: list[str], data_table: str, instrument_table: str):
"""
Construct the WHERE, FROM, and SELECT parts of the ADQL query.
nabobalis marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
query : list[str]
List of query items.
data_table : str
Name of the data table.
instrument_table : str
Name of the instrument table.

Returns
-------
tuple[str, str, str]
WHERE, FROM, and SELECT parts of the query.
"""
final_query = ""
for parameter in query:
prefix = "h1." if not parameter.startswith("Detector") else "h2."
if parameter.startswith("begin_time"):
time_list = parameter.split("+AND+")
begin_start = f"h1.{time_list[0]}"
begin_end = f"h1.{time_list[1]}"
final_query += f"{begin_start}+AND+{begin_end}+AND+"
hayesla marked this conversation as resolved.
Show resolved Hide resolved
# As there are no dimensions in STIX, the dimension index need not be included in the query for STIX.
if "stx" not in instrument_table:
# To avoid duplicate rows in the output table, the dimension index is set to 1.
final_query += "h2.dimension_index='1'+AND+"
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
else:
final_query += f"{prefix}{parameter}+AND+"

where_part = final_query[:-5]
from_part = f"{data_table} AS h1"
select_part = (
"h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name"
)
if instrument_table:
from_part += f" JOIN {instrument_table} AS h2 USING (data_item_oid)"
select_part += ", h2.detector, h2.dimension_index"
return where_part, from_part, select_part

@staticmethod
def _construct_payload(query):
"""
Construct search payload.

Parameters
----------
payload : dict[str]
Payload to send to the TAP server.
query : list[str]
List of query items.

Returns
-------
dict
Payload dictionary to be sent with the query.
"""
# Construct ADQL query
url_query = {}
url_query["SELECT"] = "*"
# Assume science data by default
url_query["FROM"] = "v_sc_data_item"
# Default data table
data_table = "v_sc_data_item"
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
instrument_table = None
# Mapping is established between the SOAR instrument names and its corresponding SOAR instrument table alias.
instrument_mapping = {
"SOLOHI": "SHI",
"EUI": "EUI",
"STIX": "STX",
"SPICE": "SPI",
"PHI": "PHI",
"METIS": "MET",
}

instrument_name = None
for q in query:
if q.startswith("level") and q.split("=")[1][1:3] == "LL":
# Low latency data
url_query["FROM"] = "v_ll_data_item"
if q.startswith("instrument") or q.startswith("descriptor") and not instrument_name:
instrument_name = q.split("=")[1][1:-1].split("-")[0].upper()
elif q.startswith("level") and q.split("=")[1][1:3] == "LL":
data_table = "v_ll_data_item"

url_query["WHERE"] = "+AND+".join(query)
adql_query = "+".join([f"{item}+{url_query[item]}" for item in url_query])
if instrument_name:
if instrument_name in instrument_mapping:
instrument_name = instrument_mapping[instrument_name]
instrument_table = f"v_{instrument_name.lower()}_sc_fits"
if data_table == "v_ll_data_item" and instrument_table:
instrument_table = instrument_table.replace("_sc_", "_ll_")

return {
"REQUEST": "doQuery",
"LANG": "ADQL",
"FORMAT": "json",
"QUERY": adql_query,
}
# Need to establish join for remote sensing instruments as they have instrument tables in SOAR.
if instrument_name in ["EUI", "STX", "MET", "SPI", "PHI", "SHI"]:
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
where_part, from_part, select_part = SOARClient.construct_join(query, data_table, instrument_table)
else:
from_part = data_table
select_part = "*"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need * here, as we select not all columns when a join is needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for instruments where we don't need the join.

where_part = "+AND+".join(query)

adql_query = {"SELECT": select_part, "FROM": from_part, "WHERE": where_part}

adql_query_str = "+".join([f"{key}+{value}" for key, value in adql_query.items()])
hayesla marked this conversation as resolved.
Show resolved Hide resolved
return {"REQUEST": "doQuery", "LANG": "ADQL", "FORMAT": "json", "QUERY": adql_query_str}

@staticmethod
def _do_search(query):
Expand Down Expand Up @@ -92,6 +161,7 @@ def _do_search(query):
# Do some list/dict wrangling
names = [m["name"] for m in r.json()["metadata"]]
info = {name: [] for name in names}

for entry in r.json()["data"]:
for i, name in enumerate(names):
info[name].append(entry[i])
Expand All @@ -113,6 +183,8 @@ def _do_search(query):
"SOOP Name": info["soop_name"],
},
)
if "detector" in info:
result_table["Detector"] = info["detector"]
result_table.sort("Start time")
return result_table

Expand Down Expand Up @@ -160,7 +232,7 @@ def _can_handle_query(cls, *query):
True if this client can handle the given query.
"""
required = {a.Time}
optional = {a.Instrument, a.Level, a.Provider, Product, SOOP}
optional = {a.Instrument, a.Detector, a.Level, a.Provider, Product, SOOP}
if not cls.check_attr_types_in_query(query, required, optional):
return False
# check to make sure the instrument attr passed is one provided by the SOAR.
Expand Down
76 changes: 76 additions & 0 deletions sunpy_soar/tests/test_sunpy_soar.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,79 @@ def test_when_wrong_provider_passed():
provider = a.Provider.noaa
res = Fido.search(time & instrument & provider)
assert len(res) == 0


def test_search_detector_Instrument_dimension_0():
nabobalis marked this conversation as resolved.
Show resolved Hide resolved
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
# Since STIX data has no dimensions, there is no detector information available in the SOAR data.
# Therefore, the search results returned by the SOARClient will contain "None" as the value for the
# "Detector" column, indicating that no detector information is available for the given data.
instrument = a.Instrument("STIX")
time = a.Time("2023-03-03 00:00", "2023-03-03 23:59")
hayesla marked this conversation as resolved.
Show resolved Hide resolved
level = a.Level(1)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also a test for LL data?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually there is test_search_low_latency() (for MAG data; executes the query), and the new test_join_low_latency_query() (for EUI; only tests the query string, without executing it). So maybe this is enough?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, will reverse this change!

res = Fido.search(instrument & time & level)
assert "Detector" in res[0].columns
assert res.file_num == 35
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Testing the number of results is maybe not very robust, this number might change in SOAR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just followed how tests are already written in test cases. But I do understand your point!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can check for at least 35 files?



def test_search_detector_Instrument_dimension_2():
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
# Instruments "EUI","METIS","PHI" and "SOLOHI" have two dimensions in the SOAR data.
# Selecting no dimension index in the query results in two identical output rows.
# To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition.
instrument = a.Instrument("EUI")
time = a.Time("2020-03-03 15:00", "2020-03-03 16:00")
level = a.Level(1)
detector = a.Detector("HRI_EUV")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 20


def test_search_detector_Instrument_dimension_4():
NucleonGodX marked this conversation as resolved.
Show resolved Hide resolved
# The "SPICE" instrument has four dimensions in the SOAR data. As a result,
# selecting no dimension index in the query results in four identical output rows.
# To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition.
instrument = a.Instrument("SPICE")
time = a.Time("2023-03-03 15:00", "2023-03-03 16:00")
level = a.Level(1)
detector = a.Detector("SW")
res = Fido.search(instrument & time & level & detector)
assert "Detector" in res[0].columns
assert res.file_num == 11


def test_join_science_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='L1'",
"descriptor='eui-fsi174-image'",
]
)

assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, "
"h2.dimension_index+FROM+v_sc_data_item AS h1 JOIN v_eui_sc_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='L1'+AND+h1.descriptor='eui-fsi174-image'"
)


def test_join_low_latency_query():
result = SOARClient._construct_payload( # NOQA: SLF001
[
"instrument='EUI'",
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'",
"level='LL01'",
"descriptor='eui-fsi174-image'",
]
)

assert result["QUERY"] == (
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, "
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, "
"h2.dimension_index+FROM+v_ll_data_item AS h1 JOIN v_eui_ll_fits AS h2 USING (data_item_oid)"
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'"
"+AND+h2.dimension_index='1'+AND+h1.level='LL01'+AND+h1.descriptor='eui-fsi174-image'"
)