Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMON Text conversion #15

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions gcn_classic_text_to_json/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,32 @@
invalid_trigger_dates = ["(yy-mm-dd)", "(yy/mm/dd)", "(yyyy/mm/dd)"]


def parse_notice(text):
"""Convert the text of an email body to a dictionary

Parameters
----------
text: string
the email body

Returns
--------
dict
The dictionary equivalent of the text"""
output = {}
text_list = text.split("\n")
for line in text_list:
line_data = line.split()
key = line_data[0][:-1]
value = " ".join(line_data[1:])
if key in output:
output[key] += "\n" + value
else:
output[key] = value

return output


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can try: Python's standard library xml / html parser modules

def parse_trigger_links(link, prefix, regex_string):
"""Returns a list of trigger_links present in `link`.

Expand Down Expand Up @@ -123,14 +149,18 @@ def text_to_json(notice, keywords_dict):
notice_ra = keywords_dict["standard"]["ra"]
ra_data = notice[notice_ra].split()

if ra_data[0] != "Undefined":
if ra_data[0] == "Undefined":
output["ra"] = None
else:
output["ra"] = float(ra_data[0][:-1])

if "dec" in keywords_dict["standard"]:
notice_dec = keywords_dict["standard"]["dec"]
dec_data = notice[notice_dec].split()

if dec_data[0] != "Undefined":
if dec_data[0] == "Undefined":
output["dec"] = None
else:
output["dec"] = float(dec_data[0][:-1])

if "additional" in keywords_dict:
Expand Down
28 changes: 28 additions & 0 deletions gcn_classic_text_to_json/notices/amon/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# AMON Text Conversion

Parses through all webpages with AMON text notices and creates a JSON with GCN schema keywords. Creates a `amon_jsons` directory inside an `output` directory and saves jsons as `AMON_{serial_number}_{record_number}.json` where serial_number is a random iterating number with no association to the notices and record_number is the current notice in the webpage.

### Uses the following fields from the core schema for text notice fields
- `id` → EVENT_NUM, RUN_NUM
- `ra` → SRC_RA
- `dec` → SRC_DEC
- `ra_dec_error` → SRC_ERROR
- `alert_datetime` → NOTICE_DATE
- `trigger_time` → DISCOVERY_DATE, DISCOVERY_TIME
- `event_name` → EVENT_NAME
- `record_number` → REVISION
- `far` → FAR

### Defines the following new fields for the text notice fields
- `notice_type` → NOTICE_TYPE
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTICE_TYPE is quiet common in GCN classic, it makes sense to add in core schema itself.

- `n_events` → N_EVENTS
- `delta_time` → DELTA_T
- `sigma_time` → SIGMA_T
- `false_positive` → FALSE_POS
- `charge` → CHARGE
- `signalness` → SIGNALNESS, SIGNAL_TRACKNESS
- `coincidence_with` → COINC_PAIR

## Caveats
- The notices have a field called STREAM, but these seems to be degenrate with NOTICE_TYPE and so I've not added these to the JSONs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
- The notices have a field called STREAM, but these seems to be degenrate with NOTICE_TYPE and so I've not added these to the JSONs
- The notices have a field called STREAM, but these seems to be degenerate with NOTICE_TYPE and so I've not added these to the JSONs

- SKYMAP_FITS_URL is a field for the Burst notice but these are not available for any of the notices. Hence, I've not included them in the JSONs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty file.
4 changes: 4 additions & 0 deletions gcn_classic_text_to_json/notices/amon/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import conversion

if __name__ == "__main__":
conversion.create_all_amon_jsons()
287 changes: 287 additions & 0 deletions gcn_classic_text_to_json/notices/amon/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
import email
import json
import os

import requests

from ... import conversion

input_gold_bronze = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"ra_dec_error_50": ("SRC_ERROR50", "float"),
"record_number": ("REVISION", "int"),
"energy": ("ENERGY", "float"),
"signalness": ("SIGNALNESS", "float"),
"far": ("FAR", "float"),
},
}

input_burst = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"record_number": ("REVISION", "int"),
"far": ("FAR", "float"),
"delta_time": ("delta_T", "float"),
"p_value": ("Pvalue", "float"),
},
}

input_coincidence = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"ra_dec_error_50": ("SRC_ERROR50", "float"),
"record_number": ("REVISION", "int"),
"delta_time": ("delta_T", "float"),
"far": ("FAR", "float"),
"event_date": ("EVENT_DATE", "string"),
},
}

input_cascade = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"ra_dec_error_50": ("SRC_ERROR50", "float"),
"record_number": ("REVISION", "int"),
"energy": ("ENERGY", "float"),
"signalness": ("SIGNALNESS", "float"),
"far": ("FAR", "float"),
},
}

input_ehe = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"record_number": ("REVISION", "int"),
"energy": ("ENERGY", "float"),
"signalness": ("SIGNALNESS", "float"),
"n_events": ("N_EVENTS", "int"),
"delta_time": ("DELTA_T", "float"),
"sigma_time": ("SIGMA_T", "float"),
"charge": ("CHARGE", "float"),
},
}

input_hese = {
"standard": {
"alert_datetime": "NOTICE_DATE",
"trigger_time": ["DISCOVERY_DATE", "DISCOVERY_TIME"],
"ra": "SRC_RA",
"dec": "SRC_DEC",
},
"additional": {
"ra_dec_error": ("SRC_ERROR", "float"),
"ra_dec_error_50": ("SRC_ERROR50", "float"),
"record_number": ("REVISION", "int"),
"signalness": ("SIGNAL_TRACKNESS", "float"),
"n_events": ("N_EVENTS", "int"),
"delta_time": ("DELTA_T", "float"),
"sigma_time": ("SIGMA_T", "float"),
"charge": ("CHARGE", "float"),
"p_value": ("PVALUE", "float"),
"false_positive": ("FALSE_POS", "float"),
},
}


def text_to_json_amon(notice, input, notice_type):
"""Function calls text_to_json and then adds additional fields with cannot be dealt with by the general function.

Parameters
-----------
notice: dict
The text notice that is being parsed.
input: dict
The mapping between text notices keywords and GCN schema keywords.
notice_type: string
The type of AMON notice.

Returns
-------
dictionary
A dictionary compliant with the associated schema for the mission."""
output_dict = conversion.text_to_json(notice, input)

output_dict["$schema"] = (
"https://gcn.nasa.gov/schema/main/gcn/notices/classic/amon/alert.schema.json"
)
output_dict["mission"] = "AMON"
output_dict["notice_type"] = notice_type

output_dict["record_number"] += 1

if notice_type != "HESE" and notice_type != "EHE":
output_dict["far"] /= 365 * 24 * 60 * 60

output_dict["id"] = [
f"{notice["RUN_NUM"].split()[0]}_{notice["EVENT_NUM"].split()[0]}"
]

if notice_type != "Cascade":
output_dict["ra_dec_error"] /= 60

if notice_type == "Astrotrack Gold" or notice_type == "Astrotrack Bronze":
output_dict["energy"] *= 1e9
output_dict["ra_dec_error_50"] /= 60
elif notice_type == "Neutrino-EM Coincidence":
output_dict["coincidence_with"] = [notice["COINC_PAIR"].split()[1]]
output_dict["ra_dec_error_50"] /= 60
elif notice_type == "Cascade" or notice_type == "EHE" or notice_type == "HESE":
output_dict["systematic_included"] = True
if notice_type == "Cascade":
output_dict["event_name"] = [notice["EVENT_NAME"].split()[0]]
if notice_type == "EHE":
output_dict["containment_probability"] = 0.5
if notice_type == "HESE":
output_dict["ra_dec_error_50"] /= 60

return output_dict


def create_amon_jsons_one_webpage(link, search_string, output_path, sernum):
"""Parse through all the triggers in `link` and convert them to JSONs

Parameters
----------
link: string
The webpage with the table of triggers
search_string: string
The search string for finding trigger links
output_path: string
The path to save the JSONs to
sernum: int
The random iterating number with no relations to the data in the JSONs

Returns
--------
sernum: int
returns sernum for the next function call"""
prefix = "https://gcn.gsfc.nasa.gov/"
links_set = conversion.parse_trigger_links(link, prefix, search_string)
links_list = list(links_set)

for link in links_list:
data = requests.get(link).text

start_idx = data.find("\n") + 1
while True:
end_idx = data.find("\n \n ", start_idx)
if end_idx == -1:
break

notice_message = email.message_from_string(data[start_idx:end_idx].strip())
if "\n\n" in notice_message.as_string():
notice_string = data[start_idx:end_idx].strip()
notice_dict = conversion.parse_notice(notice_string)
else:
comment_list = notice_message.get_all("COMMENTS")
comment_list = [item for item in comment_list if item]
comment = "\n".join(comment_list)
notice_dict = dict(notice_message)
notice_dict["COMMENTS"] = comment

notice_type = notice_message["NOTICE_TYPE"].split()[1]
if notice_type == "Astrotrack":
notice_type = f"Astrotrack {notice_message["NOTICE_TYPE"].split()[2]}"
output = text_to_json_amon(notice_dict, input_gold_bronze, notice_type)
elif notice_type == "Burst":
output = text_to_json_amon(notice_dict, input_burst, notice_type)
elif notice_type == "Neutrino-EM":
notice_type = f"Neutrino-EM {notice_message["NOTICE_TYPE"].split()[2]}"
output = text_to_json_amon(notice_dict, input_coincidence, notice_type)
elif notice_type == "Cascade":
output = text_to_json_amon(notice_dict, input_cascade, notice_type)
elif notice_type == "ICECUBE":
notice_type = notice_message["NOTICE_TYPE"].split()[2]
if notice_type == "EHE":
output = text_to_json_amon(notice_dict, input_ehe, notice_type)
elif notice_type == "HESE":
output = text_to_json_amon(notice_dict, input_hese, notice_type)

with open(
f"{output_path}AMON_{sernum}_{output["record_number"]}.json", "w"
) as f:
json.dump(output, f)

sernum += 1
temp_start_idx = data.find("///////////", end_idx)
start_idx = data.find("\n", temp_start_idx)
if temp_start_idx == -1:
break

return sernum


def create_all_amon_jsons():
"""Creates a `amon_jsons` directory and fills it with the json for all AMON triggers."""
output_path = "./output/amon_jsons/"
if not os.path.exists(output_path):
os.makedirs(output_path)

sernum = 1
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_icecube_gold_bronze_events.html",
"notices_amon_g_b/.*amon",
output_path,
sernum,
)
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_hawc_events.html",
"notices_amon_hawc/.*amon",
output_path,
sernum,
)
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_nu_em_coinc_events.html",
"notices_amon_nu_em/.*amon",
output_path,
sernum,
)
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_icecube_cascade_events.html",
"notices_amon_icecube_cascade/.*amon",
output_path,
sernum,
)
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_ehe_events.html",
"notices_amon/.*amon",
output_path,
sernum,
)
sernum = create_amon_jsons_one_webpage(
"https://gcn.gsfc.nasa.gov/amon_hese_events.html",
"notices_amon/.*amon",
output_path,
sernum,
)