-
-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enchancing Metadata Support for SOAR #118
Changes from 4 commits
122e964
0e3307b
70c4c32
a7f7102
3d15042
4748035
f5ac3ec
8496fed
f4340d8
074bb5c
f9a789c
cec907d
1873850
76e1900
d920659
d1b4510
6fc6921
f9352f1
52366cc
6b838c3
183c2d8
645aca5
bd8bc31
616f96b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,8 @@ class SOARClient(BaseClient): | |
Client to access the Solar Orbiter Archive (SOAR). | ||
""" | ||
|
||
join_needed = False | ||
|
||
def search(self, *query, **kwargs): # NOQA: ARG002 | ||
query = and_(*query) | ||
queries = walker.create(query) | ||
|
@@ -35,35 +37,132 @@ def search(self, *query, **kwargs): # NOQA: ARG002 | |
qrt.hide_keys = ["Data item ID", "Filename"] | ||
return qrt | ||
|
||
@staticmethod | ||
def construct_join(query, join_needed, data_table, instrument_table): | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
Construct the WHERE, FROM, and SELECT parts of the ADQL query. | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Parameters | ||
---------- | ||
query : list[str] | ||
List of query items. | ||
join_needed : bool | ||
Flag indicating whether a join is needed. | ||
data_table : str | ||
Name of the data table. | ||
instrument_table : str | ||
Name of the instrument table. | ||
|
||
Returns | ||
------- | ||
tuple[str, str, str] | ||
WHERE, FROM, and SELECT parts of the query. | ||
""" | ||
final_query = "" | ||
for parameter in query: | ||
prefix = "h1." if not join_needed or not parameter.startswith("Detector") else "h2." | ||
if parameter.startswith("begin_time"): | ||
time_list = parameter.split("+AND+") | ||
begin_start = f"h1.{time_list[0]}" | ||
begin_end = f"h1.{time_list[1]}" | ||
final_query += f"{begin_start}+AND+{begin_end}+AND+" | ||
hayesla marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# As there are no dimensions in STIX, the dimension index need not be included in the query for STIX. | ||
if "stx" not in instrument_table: | ||
final_query += "h2.dimension_index='1'+AND+" | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
final_query += f"{prefix}{parameter}+AND+" | ||
|
||
where_part = final_query[:-5] | ||
from_part = f"{data_table} AS h1" | ||
select_part = ( | ||
"h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " | ||
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name" | ||
) | ||
if instrument_table: | ||
from_part += f" JOIN {instrument_table} AS h2 USING (data_item_oid)" | ||
select_part += ", h2.detector, h2.dimension_index" | ||
return where_part, from_part, select_part | ||
|
||
@staticmethod | ||
def get_instrument_table(query_item): | ||
""" | ||
Get the instrument table name from the query item. | ||
|
||
For different instruments the table names are: | ||
|
||
- SOLOHI: v_shi_sc_fits | ||
- EUI: v_eui_sc_fits | ||
- STIX: v_stx_sc_fits | ||
- SPICE: v_spi_sc_fits | ||
- PHI: v_phi_sc_fits | ||
- METIS: v_met_sc_fits | ||
|
||
Parameters | ||
---------- | ||
query_item : str | ||
Query item containing the instrument name. | ||
|
||
Returns | ||
------- | ||
str | ||
Instrument table name. | ||
""" | ||
instrument_name = query_item.split("=")[1][1:-1].split("-")[0].upper() | ||
if instrument_name == "STIX": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a dict that maps the full name to the "table" name would be "cleaner" python code. |
||
instrument_name = "STX" | ||
elif instrument_name == "SOLOHI": | ||
instrument_name = "SHI" | ||
else: | ||
# For all other remote-sensing instruments, the table alias is derived from | ||
# the first three letters of the instrument name. | ||
instrument_name = instrument_name[:3] | ||
return f"v_{instrument_name.lower()}_sc_fits", instrument_name | ||
|
||
@staticmethod | ||
def _construct_payload(query): | ||
""" | ||
Construct search payload. | ||
|
||
Parameters | ||
---------- | ||
payload : dict[str] | ||
Payload to send to the TAP server. | ||
query : list[str] | ||
List of query items. | ||
|
||
Returns | ||
------- | ||
dict | ||
Payload dictionary to be sent with the query. | ||
""" | ||
# Construct ADQL query | ||
url_query = {} | ||
url_query["SELECT"] = "*" | ||
# Assume science data by default | ||
url_query["FROM"] = "v_sc_data_item" | ||
# Default data table | ||
data_table = "v_sc_data_item" | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
instrument_table = None | ||
instrument_name = "" | ||
instrument_found = False | ||
for q in query: | ||
if q.startswith("level") and q.split("=")[1][1:3] == "LL": | ||
# Low latency data | ||
url_query["FROM"] = "v_ll_data_item" | ||
if q.startswith("instrument"): | ||
instrument_table, instrument_name = SOARClient.get_instrument_table(q) | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
instrument_found = True | ||
elif q.startswith("descriptor") and not instrument_found: | ||
instrument_table, instrument_name = SOARClient.get_instrument_table(q) | ||
elif q.startswith("level") and q.split("=")[1][1:3] == "LL": | ||
data_table = "v_ll_data_item" | ||
if instrument_table: | ||
instrument_table = instrument_table.replace("_sc_", "_ll_") | ||
# The table aliases employed in the instrument-specific tables are as follows. | ||
if instrument_name in ["EUI", "STX", "MET", "SPI", "PHI", "SHI"]: | ||
SOARClient.join_needed = True | ||
where_part, from_part, select_part = SOARClient.construct_join( | ||
query, SOARClient.join_needed, data_table, instrument_table | ||
) | ||
else: | ||
from_part = data_table | ||
select_part = "*" | ||
where_part = "+AND+".join(query) | ||
|
||
url_query["WHERE"] = "+AND+".join(query) | ||
adql_query = "+".join([f"{item}+{url_query[item]}" for item in url_query]) | ||
adql_query = {"SELECT": select_part, "FROM": from_part, "WHERE": where_part} | ||
|
||
return { | ||
"REQUEST": "doQuery", | ||
"LANG": "ADQL", | ||
"FORMAT": "json", | ||
"QUERY": adql_query, | ||
} | ||
adql_query_str = "+".join([f"{key}+{value}" for key, value in adql_query.items()]) | ||
hayesla marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return {"REQUEST": "doQuery", "LANG": "ADQL", "FORMAT": "json", "QUERY": adql_query_str} | ||
|
||
@staticmethod | ||
def _do_search(query): | ||
|
@@ -92,6 +191,7 @@ def _do_search(query): | |
# Do some list/dict wrangling | ||
names = [m["name"] for m in r.json()["metadata"]] | ||
info = {name: [] for name in names} | ||
|
||
for entry in r.json()["data"]: | ||
for i, name in enumerate(names): | ||
info[name].append(entry[i]) | ||
|
@@ -113,6 +213,8 @@ def _do_search(query): | |
"SOOP Name": info["soop_name"], | ||
}, | ||
) | ||
if SOARClient.join_needed and "detector" in info: | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result_table["Detector"] = info["detector"] | ||
result_table.sort("Start time") | ||
return result_table | ||
|
||
|
@@ -160,7 +262,7 @@ def _can_handle_query(cls, *query): | |
True if this client can handle the given query. | ||
""" | ||
required = {a.Time} | ||
optional = {a.Instrument, a.Level, a.Provider, Product, SOOP} | ||
optional = {a.Instrument, a.Detector, a.Level, a.Provider, Product, SOOP} | ||
if not cls.check_attr_types_in_query(query, required, optional): | ||
return False | ||
# check to make sure the instrument attr passed is one provided by the SOAR. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -150,3 +150,73 @@ def test_when_wrong_provider_passed(): | |
provider = a.Provider.noaa | ||
res = Fido.search(time & instrument & provider) | ||
assert len(res) == 0 | ||
|
||
|
||
def test_search_detector_Instrument_dimension_0(): | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
NucleonGodX marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# As STIX data has zero dimensions, there is no detector information available in the SOAR data. | ||
nabobalis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
instrument = a.Instrument("STIX") | ||
time = a.Time("2023-03-03 00:00", "2023-03-03 23:59") | ||
hayesla marked this conversation as resolved.
Show resolved
Hide resolved
|
||
level = a.Level(1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also a test for LL data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually there is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep, will reverse this change! |
||
res = Fido.search(instrument & time & level) | ||
assert "Detector" in res[0].columns | ||
assert res.file_num == 35 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Testing the number of results is maybe not very robust, this number might change in SOAR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just followed how tests are already written in test cases. But I do understand your point! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can check for at least 35 files? |
||
|
||
|
||
def test_search_detector_Instrument_dimension_2(): | ||
NucleonGodX marked this conversation as resolved.
Show resolved
Hide resolved
|
||
instrument = a.Instrument("EUI") | ||
time = a.Time("2020-03-03 15:00", "2020-03-03 16:00") | ||
level = a.Level(1) | ||
detector = a.Detector("HRI_EUV") | ||
res = Fido.search(instrument & time & level & detector) | ||
assert "Detector" in res[0].columns | ||
assert res.file_num == 20 | ||
|
||
|
||
def test_search_detector_Instrument_dimension_4(): | ||
NucleonGodX marked this conversation as resolved.
Show resolved
Hide resolved
|
||
instrument = a.Instrument("SPICE") | ||
time = a.Time("2023-03-03 15:00", "2023-03-03 16:00") | ||
level = a.Level(1) | ||
detector = a.Detector("SW") | ||
res = Fido.search(instrument & time & level & detector) | ||
assert "Detector" in res[0].columns | ||
assert res.file_num == 11 | ||
|
||
|
||
def test_join_science_query(): | ||
result = SOARClient._construct_payload( # NOQA: SLF001 | ||
[ | ||
"instrument='EUI'", | ||
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'", | ||
"level='L1'", | ||
"descriptor='eui-fsi174-image'", | ||
] | ||
) | ||
|
||
# Formatted assert statement | ||
assert result["QUERY"] == ( | ||
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " | ||
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, " | ||
"h2.dimension_index+FROM+v_sc_data_item AS h1 JOIN v_eui_sc_fits AS h2 USING (data_item_oid)" | ||
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'" | ||
"+AND+h2.dimension_index='1'+AND+h1.level='L1'+AND+h1.descriptor='eui-fsi174-image'" | ||
) | ||
|
||
|
||
def test_join_low_latency_query(): | ||
result = SOARClient._construct_payload( # NOQA: SLF001 | ||
[ | ||
"instrument='EUI'", | ||
"begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'", | ||
"level='LL01'", | ||
"descriptor='eui-fsi174-image'", | ||
] | ||
) | ||
|
||
# Formatted assert statement | ||
assert result["QUERY"] == ( | ||
"SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " | ||
"h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, " | ||
"h2.dimension_index+FROM+v_ll_data_item AS h1 JOIN v_eui_ll_fits AS h2 USING (data_item_oid)" | ||
"+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'" | ||
"+AND+h2.dimension_index='1'+AND+h1.level='LL01'+AND+h1.descriptor='eui-fsi174-image'" | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure I like the idea of this being part of the client, can this not be moved else where?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could achieve a solution by making a basic function for it, let me know if its better!