From 2d775fbf84cc1e80ed1b86e98f0728ffd65cc15e Mon Sep 17 00:00:00 2001
From: Gavin Medley <gavin.medley@lasp.colorado.edu>
Date: Wed, 9 Oct 2024 17:49:31 -0600
Subject: [PATCH] Add documentation about raw vs normal values for parsed
 parameters Add example documentation for each parameter type Add support for
 float and string encoded enumerated lookups

---
 docs/source/changelog.md           |   1 +
 docs/source/users.md               | 150 +++++++++++++++++++++++++++++
 space_packet_parser/calibrators.py |   8 +-
 space_packet_parser/parameters.py  |  35 +++++--
 tests/unit/test_xtcedef.py         |  73 +++++++++++++-
 5 files changed, 253 insertions(+), 14 deletions(-)

diff --git a/docs/source/changelog.md b/docs/source/changelog.md
index 352dc00..4c90fdc 100644
--- a/docs/source/changelog.md
+++ b/docs/source/changelog.md
@@ -7,6 +7,7 @@ Release notes for the `space_packet_parser` library
 
 ### v5.1.0 (unreleased)
 - BUGFIX: Fix kbps calculation in packet generator for showing progress.
+- Add support for string and float encoded enumerated lookup parameters.
 
 ### v5.0.1 (released)
 - BUGFIX: Allow raw_value representation for enums with falsy raw values. Previously these defaulted to the enum label.
diff --git a/docs/source/users.md b/docs/source/users.md
index 1903140..67f87c0 100644
--- a/docs/source/users.md
+++ b/docs/source/users.md
@@ -1,3 +1,5 @@
+from space_packet_parser.definitions import XtcePacketDefinition
+
 # User Documentation (Getting Started)
 ## Installation
 This package is distributed via PyPI. To install it with pip, run:
@@ -39,6 +41,154 @@ with packet_file.open("rb") as binary_data:
 We aim to provide examples of usage patterns. Please see the `examples` directory in the GitHub repo. If there is 
 a specific example you want to see demonstrated, please open a GitHub Issue or Discussion for support.
 
+## Packet Objects
+The object returned from the `packet_generator` is a `CCSDSPacket` (unless you're yielding parsing 
+exceptions for debugging). This object subclasses a python dictionary and behaves as a dictionary. To retrieve 
+a parameter value from the yielded packet, you can iterate over its `items()` or you can access individual parameters 
+by name. 
+
+```python
+packet = next(packet_definition.packet_generator(data))
+my_param = packet["MY_PARAM_NAME"]
+all_param_names = list(packet.keys())
+```
+
+## Parameter Objects
+
+The parameter values within the packet are subclasses of normal python data types: 
+`int`, `float`, `str`, `bool` and `bytes`. The objects behave exactly as the python data types except that they all 
+contain a `raw_value` attribute, which contains the value generated by the data encoding parser, before being passed 
+through any calibrators, enum lookups, string parsing, or boolean evaluation.
+
+```python
+print(my_param)  # prints the most derived value available - str, int, float, bytes, or bool
+print(my_param.raw_value)  # prints the "raw" encoded value parsed by the low level data encoding
+```
+
+Space Packet Parser returns the following types for parameters within a packet. They behave just as their Python 
+base classes (`int`, `float`, `str`, `bytes`, and `bool` respectively) except that each contains a `raw_value`
+attribute that contains the encoded value before applying any calibration or other derived processing of the value.
+The primary value of each parameter type is the fully parsed (calibrated, enumerated, string-parsed, etc.) value.
+- `IntParameter`
+- `FloatParameter`
+- `StrParameter`
+- `BinaryParameter`
+- `BoolParameter`
+
+### Numeric Calibration
+Int and float parameters can be calibrated on the fly during decoding. These calibrators are defined on the data 
+encoding XTCE element and can transform the raw encoded value to a calibrated value, e.g. via a polynomial. Calibrated 
+values are always floats, even if the raw encoded value is an integer.
+
+For example,
+```xml
+<xtce:IntegerDataEncoding xmlns:xtce="http://www.omg.org/space/xtce" sizeInBits="16" encoding="unsigned">
+    <xtce:DefaultCalibrator>
+        <xtce:PolynomialCalibrator>
+            <xtce:Term exponent="1" coefficient="1.215500e-02"/>
+            <xtce:Term exponent="0" coefficient="2.540000e+00"/>
+        </xtce:PolynomialCalibrator>
+    </xtce:DefaultCalibrator>
+</xtce:IntegerDataEncoding>
+```
+in this encoding definition, the raw encoded value is a 16bit unsigned integer that is calibrated by a polynomial 
+to produce a calibrated value, which is always a float. In this case `value = .012155 * raw_value + 2.54`.
+
+### String Parsing
+Strings are encoded as a buffer of determined size (either fixed length or dynamic based on previous parameter). The 
+raw buffer includes any additional string data such as a leading size integer or a termination character. If a 
+leading size or termination character is specified in the XTCE definition, the parsed string value is returned as 
+the value of the parameter and the buffer is returned as the `raw_value`. If no termination character or leading size
+is specified, the value and `raw_value` are the same and both refer to the raw string buffer.
+
+For example,
+```xml
+<xtce:StringDataEncoding xmlns:xtce="http://www.omg.org/space/xtce">
+    <xtce:Variable maxSizeInBits="32">
+        <xtce:DynamicValue>
+            <xtce:ParameterInstanceRef parameterRef="STR_SIZE"/>
+            <xtce:LinearAdjustment intercept="27" slope="8"/>
+        </xtce:DynamicValue>
+        <xtce:LeadingSize sizeInBitsOfSizeTag="3"/>
+    </xtce:Variable>
+</xtce:StringDataEncoding>
+```
+in this encoding definition, the size of the raw string buffer (number of bytes in the packet) is defined by a 
+parameter named `STR_SIZE`. The value stored in `STR_SIZE` is given in number of bytes so it is multiplied by 8 and a
+constant base length of 27 bits is added to the final buffer size. So if `STR_SIZE` encodes the value 4, the raw string
+buffer width in the packet is 59bits. This is an odd size for a string because it is not an integer number of bytes 
+but that's because it includes a 3bit unsigned int in front of the string data that specifies the size of the string, 
+in bits, making the raw string `[3 bit uint | 7 bytes]`
+
+In this case, the `raw_value` of the parameter will contain the full string buffer as an 8 byte string, 
+padded on the RHS with 5 zero bits. We have to pad it because you cannot create a byte string from a non-integer 
+number of bytes (59bits). The `value` of the parameter will contain the fully parsed `str` object based on the value 
+of the leading size. If the leading size uint3 represents the integer 4, the `value` of the parameter will be a `str` 
+that is made of the first 4 bytes of data in the raw buffer following the leading size.
+
+Termination characters work similarly.
+```xml
+<xtce:StringDataEncoding encoding="UTF-16BE" xmlns:xtce="http://www.omg.org/space/xtce">
+    <xtce:SizeInBits>
+        <xtce:Fixed>
+            <xtce:FixedValue>32</xtce:FixedValue>
+        </xtce:Fixed>
+        <xtce:TerminationChar>0058</xtce:TerminationChar>
+    </xtce:SizeInBits>
+</xtce:StringDataEncoding>
+```
+In this case, the raw buffer is a fixed length (32bits). 
+The parsed `StrParameter.raw_value` will be the full string buffer, including the termination 
+character and any additional following bytes. The `value` of the parameter will be a `str` based on all the encoded 
+bytes preceding the termination character. In this case, the raw string buffer _will_ always be an integer number of 
+bytes since a termination character is always an integer number of bytes, so no padding of the raw value is required.
+
+### Enumerated Lookups
+Enums are defined by lookup tables in the XTCE, which are converted to dictionaries internally. Once the raw value 
+from the data encoding is parsed, a lookup is made to the lookup table and the final string label is returned. 
+Note that the final label from enumerated lookups is always a string. The raw value used in the lookup table is 
+interpreted based on the data encoding for the parameter. Integer encoded enum values are ints, float encoded values 
+are floats, and string encoded values are used as the raw string buffer from the encoding.
+
+Only raw values may be used for enum lookups. 
+Calibrated numeric values cannot be used for enum lookups from numeric encodings. For string encoded parameters, 
+only raw string buffers may be used for lookups (not fully parsed strings).
+
+For example,
+```xml
+<xtce:EnumeratedParameterType xmlns:xtce="http://www.omg.org/space/xtce" name="TEST_ENUM_Type">
+    <xtce:UnitSet/>
+    <xtce:IntegerDataEncoding sizeInBits="8" encoding="unsigned"/>
+    <xtce:EnumerationList>
+        <xtce:Enumeration label="OP_LOW" value="0"/>
+        <xtce:Enumeration label="OP_HIGH" value="7"/>
+    </xtce:EnumerationList>
+</xtce:EnumeratedParameterType>
+```
+the encoded value (`raw_value`) is a uint8 integer but the value returned for an enumerated parameter type will 
+be a `StrParameter` containing the label string associated with the integer value.
+
+### Boolean Evaluation
+Booleans behave nicely for integers and floats where zero is False and everything else is True. For string and binary
+encoded values, the only falsy value is an empty string, which is kind of silly to encode. XTCE is not specific on the 
+interpretation of string and binary encoded values for boolean parameters and there is no generally accepted 
+interpretation, so we default to Python's `bool`, which interprets any non-empty string as True. 
+
+Only raw values may be used for boolean evaluation. Calibrated values are not considered.
+
+For example,
+```xml
+<xtce:BooleanParameterType xmlns:xtce="http://www.omg.org/space/xtce" name="TEST_PARAM_Type">
+    <xtce:UnitSet>
+        <xtce:Unit>smoot</xtce:Unit>
+    </xtce:UnitSet>
+    <xtce:IntegerDataEncoding encoding="unsigned" sizeInBits="1"/>
+</xtce:BooleanParameterType>
+```
+the encoded value (`raw_value`) is a single bit interpreted as an integer but the value returned for a boolean 
+parameter type will be a `BoolParameter`, evaluated over the encoded integer value. `False` if the integer is 0, 
+`True` otherwise.
+
 ## Parsing from a Socket
 The input data object to `XtcePacketDefinition.packet_generator` need only be a binary filelike object from which 
 bytes can be read. This means the packet generator is not limited to parsing data from files! In an effort to support
diff --git a/space_packet_parser/calibrators.py b/space_packet_parser/calibrators.py
index 59436bb..f0abacb 100644
--- a/space_packet_parser/calibrators.py
+++ b/space_packet_parser/calibrators.py
@@ -32,7 +32,7 @@ def from_calibrator_xml_element(cls, element: ElementTree.Element, ns: dict) ->
         return NotImplemented
 
     @abstractmethod
-    def calibrate(self, uncalibrated_value: Union[int, float]) -> Union[int, float]:
+    def calibrate(self, uncalibrated_value: Union[int, float]) -> float:
         """Takes an integer-encoded or float-encoded value and returns a calibrated version.
 
         Parameters
@@ -42,7 +42,7 @@ def calibrate(self, uncalibrated_value: Union[int, float]) -> Union[int, float]:
 
         Returns
         -------
-        : Union[int, float]
+        : float
             Calibrated value
         """
         raise NotImplementedError
@@ -345,7 +345,7 @@ def from_context_calibrator_xml_element(cls, element: ElementTree.Element, ns: d
 
         return cls(match_criteria=match_criteria, calibrator=calibrator)
 
-    def calibrate(self, parsed_value: Union[int, float]) -> Union[int, float]:
+    def calibrate(self, parsed_value: Union[int, float]) -> float:
         """Wrapper method for the internal `Calibrator.calibrate`
 
         Parameters
@@ -355,7 +355,7 @@ def calibrate(self, parsed_value: Union[int, float]) -> Union[int, float]:
 
         Returns
         -------
-        : Union[int, float]
+        : float
             Calibrated value
         """
         return self.calibrator.calibrate(parsed_value)
diff --git a/space_packet_parser/parameters.py b/space_packet_parser/parameters.py
index 5c0e619..6b752da 100644
--- a/space_packet_parser/parameters.py
+++ b/space_packet_parser/parameters.py
@@ -210,11 +210,11 @@ def from_parameter_type_xml_element(cls, element: ElementTree.Element, ns: dict)
         name = element.attrib['name']
         unit = cls.get_units(element, ns)
         encoding = cls.get_data_encoding(element, ns)
-        enumeration = cls.get_enumeration_list_contents(element, ns)
+        enumeration = cls.get_enumeration_list_contents(element, encoding, ns)
         return cls(name, encoding, enumeration=enumeration, unit=unit)
 
     @staticmethod
-    def get_enumeration_list_contents(element: ElementTree.Element, ns: dict) -> dict:
+    def get_enumeration_list_contents(element: ElementTree.Element, encoding: encodings.DataEncoding, ns: dict) -> dict:
         """Finds the <xtce:EnumerationList> element child of an <xtce:EnumeratedParameterType> and parses it,
         returning a dict. This method is confusingly named as if it might return a list. Sorry, XML and python
         semantics are not always compatible. It's called an enumeration list because the XML element is called
@@ -224,6 +224,8 @@ def get_enumeration_list_contents(element: ElementTree.Element, ns: dict) -> dic
         ----------
         element : ElementTree.Element
             The XML element from which to search for EnumerationList tags
+        encoding: encodings.DataEncoding
+            The data encoding informs how to interpret the keys in the enumeration list (int, float, or str).
         ns : dict
             XML namespace dict
 
@@ -235,10 +237,27 @@ def get_enumeration_list_contents(element: ElementTree.Element, ns: dict) -> dic
         if enumeration_list is None:
             raise ValueError("An EnumeratedParameterType must contain an EnumerationList.")
 
-        return {
-            int(el.attrib['value']): el.attrib['label']
-            for el in enumeration_list.iterfind('xtce:Enumeration', ns)
-        }
+        if isinstance(encoding, encodings.IntegerDataEncoding):
+            return {
+                int(el.attrib['value']): el.attrib['label']
+                for el in enumeration_list.iterfind('xtce:Enumeration', ns)
+            }
+
+        if isinstance(encoding, encodings.FloatDataEncoding):
+            return {
+                float(el.attrib['value']): el.attrib['label']
+                for el in enumeration_list.iterfind('xtce:Enumeration', ns)
+            }
+
+        if isinstance(encoding, encodings.StringDataEncoding):
+            return {
+                bytes(el.attrib['value'], encoding=encoding.encoding): el.attrib['label']
+                for el in enumeration_list.iterfind('xtce:Enumeration', ns)
+            }
+
+        raise ValueError(f"Detected unsupported encoding type {encoding} for an EnumeratedParameterType."
+                         "Supported encodings for enums are FloatDataEncoding, IntegerDataEncoding, "
+                         "and StringDataEncoding.")
 
     def parse_value(self, packet: packets.CCSDSPacket, **kwargs) -> packets.StrParameter:
         """Using the parameter type definition and associated data encoding, parse a value from a bit stream starting
@@ -256,8 +275,8 @@ def parse_value(self, packet: packets.CCSDSPacket, **kwargs) -> packets.StrParam
             Resulting enum label associated with the (usually integer-)encoded data value.
         """
         raw_enum_value = super().parse_value(packet, **kwargs).raw_value
-        # Note: The enum lookup only operates on raw values. This is specified in 4.3.2.4.3.6 of the XTCE spec "
-        # CCSDS 660.1-G-2
+        # Note: The enum lookup only operates on raw values. This is specified in Fig 4-43 in
+        # section 4.3.2.4.3.6 of the XTCE spec CCSDS 660.1-G-2
         # Note, this doesn't prohibit a user from defining a calibrator on an encoding that is used for an enum lookup.
         # It just means that the calibrated derived value doesn't get used for the lookup, nor will the calibrated
         # value be represented in the returned as part of the returned enum (string) parameter
diff --git a/tests/unit/test_xtcedef.py b/tests/unit/test_xtcedef.py
index 271f395..fb2e5ba 100644
--- a/tests/unit/test_xtcedef.py
+++ b/tests/unit/test_xtcedef.py
@@ -1726,6 +1726,51 @@ def test_float_parameter_parsing(parameter_type, raw_data, expected):
                                             # NOTE: Duplicate final value is on purpose to make sure we handle that case
                                             enumeration={0: 'BOOT_POR', 1: 'BOOT_RETURN', 2: 'OP_LOW', 3: 'OP_HIGH',
                                                          4: 'OP_HIGH'})),
+        ("""
+<xtce:EnumeratedParameterType xmlns:xtce="http://www.omg.org/space/xtce" name="TEST_ENUM_Type">
+    <xtce:UnitSet/>
+    <xtce:FloatDataEncoding sizeInBits="32" encoding="IEEE-754"/>
+    <xtce:EnumerationList>
+        <xtce:Enumeration label="BOOT_POR" value="0.0"/>
+        <xtce:Enumeration label="BOOT_RETURN" value="1.1"/>
+        <xtce:Enumeration label="OP_LOW" value="2.2"/>
+        <xtce:Enumeration label="OP_HIGH" value="3.3"/>
+        <xtce:Enumeration label="OP_HIGH" value="4.4"/>
+    </xtce:EnumerationList>
+</xtce:EnumeratedParameterType>
+""",
+         parameters.EnumeratedParameterType(name='TEST_ENUM_Type',
+                                            encoding=encodings.FloatDataEncoding(size_in_bits=32, encoding='IEEE-754'),
+                                            # NOTE: Duplicate final value is on purpose to make sure we handle that case
+                                            enumeration={0.0: 'BOOT_POR', 1.1: 'BOOT_RETURN', 2.2: 'OP_LOW', 3.3: 'OP_HIGH',
+                                                         4.4: 'OP_HIGH'})),
+        ("""
+<xtce:EnumeratedParameterType xmlns:xtce="http://www.omg.org/space/xtce" name="TEST_ENUM_Type">
+    <xtce:UnitSet/>
+    <xtce:StringDataEncoding>
+        <xtce:SizeInBits>
+            <xtce:Fixed>
+                <xtce:FixedValue>16</xtce:FixedValue>
+            </xtce:Fixed>
+        </xtce:SizeInBits>
+    </xtce:StringDataEncoding>
+    <xtce:EnumerationList>
+        <xtce:Enumeration label="BOOT_POR" value="AA"/>
+        <xtce:Enumeration label="BOOT_RETURN" value="BB"/>
+        <xtce:Enumeration label="OP_LOW" value="CC"/>
+        <xtce:Enumeration label="OP_HIGH" value="DD"/>
+        <xtce:Enumeration label="OP_HIGH" value="EE"/>
+    </xtce:EnumerationList>
+</xtce:EnumeratedParameterType>
+""",
+         parameters.EnumeratedParameterType(name='TEST_ENUM_Type',
+                                            encoding=encodings.StringDataEncoding(fixed_raw_length=16),
+                                            # NOTE: Duplicate final value is on purpose to make sure we handle that case
+                                            enumeration={b"AA": 'BOOT_POR',
+                                                         b"BB": 'BOOT_RETURN',
+                                                         b"CC": 'OP_LOW',
+                                                         b"DD": 'OP_HIGH',
+                                                         b"EE": 'OP_HIGH'})),
     ]
 )
 def test_enumerated_parameter_type(xml_string: str, expectation):
@@ -1769,15 +1814,39 @@ def test_enumerated_parameter_type(xml_string: str, expectation):
          0,
          'USES_UNCALIBRATED_VALUE'),
         (parameters.EnumeratedParameterType(
-            'TEST_FLOAT_ENUM',
+            'TEST_NEGATIVE_ENUM',
             encodings.IntegerDataEncoding(16, 'signed'), {-42: 'VAL_LOW'}),
          0b1111111111010110.to_bytes(length=2, byteorder='big'),
          -42,
          'VAL_LOW'),
+        (parameters.EnumeratedParameterType(name='TEST_FLOAT_ENUM',
+                                            encoding=encodings.FloatDataEncoding(
+                                                size_in_bits=32,
+                                                encoding='IEEE-754',
+                                                byte_order="mostSignificantByteFirst"),
+                                            # NOTE: Duplicate final value is on purpose to make sure we handle that case
+                                            enumeration={0.0: 'BOOT_POR', 3.5: 'BOOT_RETURN', 2.2: 'OP_LOW',
+                                                         3.3: 'OP_HIGH',
+                                                         4.4: 'OP_HIGH'}),
+         0b01000000011000000000000000000000.to_bytes(length=4, byteorder='big'),
+         3.5,
+         "BOOT_RETURN"
+         ),
+        (parameters.EnumeratedParameterType(name='TEST_ENUM_Type',
+                                            encoding=encodings.StringDataEncoding(fixed_raw_length=16),
+                                            # NOTE: Duplicate final value is on purpose to make sure we handle that case
+                                            enumeration={b"AA": 'BOOT_POR',
+                                                         b"BB": 'BOOT_RETURN',
+                                                         b"CC": 'OP_LOW',
+                                                         b"DD": 'OP_HIGH',
+                                                         b"EE": 'OP_HIGH'}),
+         b'CCXXXX',
+         b'CC',
+         "OP_LOW")
     ]
 )
 def test_enumerated_parameter_parsing(parameter_type, raw_data, expected_raw, expected):
-    """"Test parsing enumerated parameters"""
+    """Test parsing enumerated parameters"""
     packet = packets.CCSDSPacket(raw_data=raw_data)
     value = parameter_type.parse_value(packet)
     assert value == expected