-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CoDICE L1 decompression and unpacking data updates #762
Changes from 13 commits
0f22478
caf55ec
bb9cb30
6f81bef
7ae84a4
dc2602c
953c10c
69814da
4e9252f
c7a8d09
9bfb816
c425910
a21727c
38704da
4d58231
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,16 +27,22 @@ | |
from imap_processing.cdf.utils import met_to_j2000ns | ||
from imap_processing.codice import constants | ||
from imap_processing.codice.codice_l0 import decom_packets | ||
from imap_processing.codice.decompress import decompress | ||
from imap_processing.codice.utils import CODICEAPID, add_metadata_to_array | ||
from imap_processing.utils import group_by_apid, sort_by_time | ||
|
||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
# TODO: Decom data arrays need to be decompressed | ||
# TODO: In decommutation, how to have a variable length data and then a checksum | ||
# after it? (Might be fixed with new XTCE script updates) | ||
# TODO: Add support for decomming multiple APIDs from a single file | ||
# TODO: Add these as variables in CDF: SPIN_PERIOD, ST_BIAS_GAIN_MODE, | ||
bourque marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# SW_BIAS_GAIN_MODE, RGFO_HALF_SPIN, NSO_HALF_SPIN, DATA_QUALITY | ||
# TODO: Use new packet_file_to_dataset() function to simplify things | ||
# TODO: Determine what should go in event data CDF and how it should be | ||
# structured. | ||
# TODO: Make sure CDF attributes match expected nomenclature | ||
|
||
|
||
class CoDICEL1aPipeline: | ||
|
@@ -69,8 +75,10 @@ class CoDICEL1aPipeline: | |
Retrieve the acquisition times via the Lo stepping table. | ||
get_esa_sweep_values() | ||
Retrieve the ESA sweep values. | ||
unpack_science_data() | ||
Make 4D L1a data product from the decompressed science data. | ||
unpack_hi_science_data() | ||
Decompress, unpack, and restructure CoDICE-Hi data arrays. | ||
unpack_lo_science_data() | ||
Decompress, unpack, and restructure CoDICE-Lo data arrays. | ||
""" | ||
|
||
def __init__(self, table_id: int, plan_id: int, plan_step: int, view_id: int): | ||
|
@@ -92,8 +100,11 @@ def configure_data_products(self, apid: int) -> None: | |
config = constants.DATA_PRODUCT_CONFIGURATIONS.get(apid) # type: ignore[call-overload] | ||
self.num_counters = config["num_counters"] | ||
self.num_energy_steps = config["num_energy_steps"] | ||
self.num_spin_sectors = config["num_spin_sectors"] | ||
self.num_positions = config["num_positions"] | ||
self.variable_names = config["variable_names"] | ||
self.dataset_name = config["dataset_name"] | ||
self.instrument = config["instrument"] | ||
|
||
def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset: | ||
""" | ||
|
@@ -121,11 +132,23 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset | |
|
||
# Define coordinates | ||
epoch = xr.DataArray( | ||
met_to_j2000ns(met), # TODO: Fix after SIT-3 (see note below) | ||
[met_to_j2000ns(met)], | ||
name="epoch", | ||
dims=["epoch"], | ||
attrs=cdf_attrs.get_variable_attributes("epoch"), | ||
) | ||
inst_az = xr.DataArray( | ||
np.arange(self.num_positions), | ||
name="inst_az", | ||
dims=["inst_az"], | ||
attrs=cdf_attrs.get_variable_attributes("inst_az_attrs"), | ||
) | ||
spin_sector = xr.DataArray( | ||
np.arange(self.num_spin_sectors), | ||
name="spin_sector", | ||
dims=["spin_sector"], | ||
attrs=cdf_attrs.get_variable_attributes("spin_sector_attrs"), | ||
) | ||
energy_steps = xr.DataArray( | ||
np.arange(self.num_energy_steps), | ||
name="energy", | ||
|
@@ -145,6 +168,8 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset | |
dataset = xr.Dataset( | ||
coords={ | ||
"epoch": epoch, | ||
"inst_az": inst_az, | ||
"spin_sector": spin_sector, | ||
"energy": energy_steps, | ||
"energy_label": energy_label, | ||
}, | ||
|
@@ -153,25 +178,34 @@ def create_science_dataset(self, met: np.int64, data_version: str) -> xr.Dataset | |
|
||
# Create a data variable for each counter | ||
for variable_data, variable_name in zip(self.data, self.variable_names): | ||
# TODO: Currently, cdflib doesn't properly write/read CDF files that | ||
# have a single epoch value. To get around this for now, use | ||
# two epoch values and reshape accordingly. Revisit this after | ||
# SIT-3. See https://github.com/MAVENSDC/cdflib/issues/268 | ||
variable_data_arr = np.array(list(variable_data) * 2, dtype=int).reshape( | ||
2, self.num_energy_steps | ||
) | ||
# Data arrays are structured depending on the instrument | ||
if self.instrument == "lo": | ||
variable_data_arr = np.array(variable_data).reshape( | ||
1, self.num_positions, self.num_spin_sectors, self.num_energy_steps | ||
) | ||
dims = ["epoch", "inst_az", "spin_sector", "energy"] | ||
elif self.instrument == "hi": | ||
variable_data_arr = np.array(variable_data).reshape( | ||
1, self.num_energy_steps, self.num_positions, self.num_spin_sectors | ||
) | ||
dims = ["epoch", "energy", "inst_az", "spin_sector"] | ||
|
||
# Get the CDF attributes | ||
cdf_attrs_key = ( | ||
f"{self.dataset_name.split('imap_codice_l1a_')[-1]}-{variable_name}" | ||
) | ||
attrs = cdf_attrs.get_variable_attributes(cdf_attrs_key) | ||
|
||
# Create the CDF data variable | ||
dataset[variable_name] = xr.DataArray( | ||
variable_data_arr, | ||
name=variable_name, | ||
dims=["epoch", "energy"], | ||
attrs=cdf_attrs.get_variable_attributes(cdf_attrs_key), | ||
dims=dims, | ||
attrs=attrs, | ||
) | ||
|
||
# Add ESA Sweep Values and acquisition times (lo only) | ||
if "_lo_" in self.dataset_name: | ||
if self.instrument == "lo": | ||
self.get_esa_sweep_values() | ||
self.get_acquisition_times() | ||
dataset["esa_sweep_values"] = xr.DataArray( | ||
|
@@ -264,31 +298,46 @@ def get_esa_sweep_values(self) -> None: | |
|
||
def unpack_science_data(self, science_values: str) -> None: | ||
""" | ||
Unpack the science data from the packet. | ||
Decompress, unpack, and restructure science data arrays. | ||
|
||
For LO SW Species Counts data, the science data within the packet is a | ||
blob of compressed values of length 2048 bits (16 species * 128 energy | ||
levels). These data need to be divided up by species so that each | ||
species can have their own data variable in the L1A CDF file. | ||
The science data within the packet is a compressed, binary string of | ||
values. These data need to be divided up by species or priorities (or | ||
what I am calling "counters" as a general term), and re-arranged into | ||
3D arrays representing spin sectors, positions, and energies (the order | ||
of which depends on the instrument). | ||
|
||
Parameters | ||
---------- | ||
science_values : str | ||
A string of binary data representing the science values of the data. | ||
""" | ||
self.compression_algorithm = constants.LO_COMPRESSION_ID_LOOKUP[self.view_id] | ||
self.collapse_table_id = constants.LO_COLLAPSE_TABLE_ID_LOOKUP[self.view_id] | ||
|
||
# TODO: Turn this back on after SIT-3 | ||
# For SIT-3, just create appropriate length data arrays of all ones | ||
# Divide up the data by the number of priorities or species | ||
# science_values = packets[0].data["DATA"].raw_value | ||
# num_bits = len(science_values) | ||
# chunk_size = len(science_values) // self.num_counters | ||
# self.data = [ | ||
# science_values[i : i + chunk_size] for i in range(0, num_bits, chunk_size) | ||
# ] | ||
self.data = [["1"] * 128] * self.num_counters | ||
# Decompress the binary string into a list of integers | ||
science_values_decompressed = decompress( | ||
science_values, self.compression_algorithm | ||
) | ||
|
||
# Re-arrange the counter data | ||
# For CoDICE-lo, data are a 3D arrays with a shape representing | ||
# [<num_positions>,<num_spin_sectors>,<num_energy_steps>] | ||
if self.instrument == "lo": | ||
self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you mean to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think this can be
|
||
self.num_counters, | ||
self.num_positions, | ||
self.num_spin_sectors, | ||
self.num_energy_steps, | ||
) | ||
|
||
# For CoDICE-hi, data are a 3D array with a shape representing | ||
# [<num_energy_steps>,<num_positions>,<num_spin_sectors>] | ||
elif self.instrument == "hi": | ||
self.data = np.array(science_values_decompressed, dtype=np.uint).reshape( | ||
self.num_counters, | ||
self.num_energy_steps, | ||
self.num_positions, | ||
self.num_spin_sectors, | ||
) | ||
|
||
|
||
def create_event_dataset( | ||
|
@@ -334,9 +383,6 @@ def create_event_dataset( | |
attrs=cdf_attrs.get_global_attributes(dataset_name), | ||
) | ||
|
||
# TODO: Determine what should go in event data CDF and how it should be | ||
# structured. | ||
|
||
return dataset | ||
|
||
|
||
|
@@ -385,13 +431,15 @@ def create_hskp_dataset( | |
) | ||
|
||
# TODO: Change 'TBD' catdesc and fieldname | ||
# Once housekeeping packet definition file is re-generated with updated | ||
# version of space_packet_parser, can get fieldname and catdesc info via: | ||
# for key, value in (packet.header | packet.data).items(): | ||
# fieldname = value.short_description | ||
# catdesc = value.long_description | ||
# I am holding off making this change until I acquire updated housekeeping | ||
# packets/validation data that match the latest telemetry definitions | ||
# Once housekeeping packet definition file is re-generated with | ||
# updated version of space_packet_parser, can get fieldname and | ||
# catdesc info via: | ||
# for key, value in (packet.header | packet.data).items(): | ||
# fieldname = value.short_description | ||
# catdesc = value.long_description | ||
# I am holding off making this change until I acquire updated | ||
# housekeeping packets/validation data that match the latest telemetry | ||
# definitions | ||
bourque marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for key, value in metadata_arrays.items(): | ||
attrs = cdf_attrs.get_variable_attributes("codice_support_attrs") | ||
attrs["CATDESC"] = "TBD" | ||
|
@@ -457,8 +505,6 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: | |
dataset : xarray.Dataset | ||
The ``xarray`` dataset containing the science data and supporting metadata. | ||
""" | ||
# TODO: Use new packet_file_to_dataset() function to simplify things | ||
bourque marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Decom the packets, group data by APID, and sort by time | ||
packets = decom_packets(file_path) | ||
grouped_data = group_by_apid(packets) | ||
|
@@ -496,7 +542,7 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: | |
|
||
# Determine the start time of the packet | ||
met = packets[0].data["ACQ_START_SECONDS"].raw_value | ||
met = [met, met + 1] # TODO: Remove after cdflib fix | ||
|
||
# Extract the data | ||
science_values = packets[0].data["DATA"].raw_value | ||
|
||
|
@@ -510,4 +556,5 @@ def process_codice_l1a(file_path: Path, data_version: str) -> xr.Dataset: | |
dataset = pipeline.create_science_dataset(met, data_version) | ||
|
||
logger.info(f"\nFinal data product:\n{dataset}\n") | ||
|
||
return dataset |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm planning to provide better labels/descriptions for these in an upcoming PR where I address nomenclature