diff --git a/pylintrc b/pylintrc index 5746484..399d504 100644 --- a/pylintrc +++ b/pylintrc @@ -28,7 +28,7 @@ unsafe-load-any-extension = no # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code -extension-pkg-whitelist = +extension-pkg-allow-list = lxml.etree [MESSAGES CONTROL] diff --git a/space_packet_parser/canonicalized_strings.py b/space_packet_parser/canonicalized_strings.py new file mode 100644 index 0000000..e69de29 diff --git a/space_packet_parser/xtcedef.py b/space_packet_parser/xtcedef.py index bc5bc64..28c17ae 100644 --- a/space_packet_parser/xtcedef.py +++ b/space_packet_parser/xtcedef.py @@ -969,7 +969,27 @@ def read(self, format_string: str, update_position: bool = True) -> Union[int, f : Union[int, float, str, bytes] Value read from the packet data according to the format specifier. """ - # pylint: disable=too-many-branches + # pylint: disable=too-many-branches, too-many-return-statements + + def _twos_complement(val: int, bit_width: int): + """Compute the twos complement of an integer, assuming big endian + + Parameters + ---------- + val : int + Raw integer value (representation of bits), assuming MSB first. + bit_width : int + Number of bits + + Returns + ------- + : int + Twos complement of `val` + """ + if (val & (1 << (bit_width - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 + return val - (1 << bit_width) # compute negative value + return val # return positive value as is + name, n_things = format_string.split(":") if name == "bytes": nbits = int(n_things) * 8 @@ -984,13 +1004,33 @@ def read(self, format_string: str, update_position: bool = True) -> Union[int, f if update_position: self.pos += nbits - if name == "uint": + # TODO: This logic is messy. When we tackle getting away from the bitstring format strings, + # refactor this to be more concise + if name in ("uint", "uintbe"): return bytes_as_int - if name == "int": + if name == "uintle": + # Convert little-endian (LSB first) int to bigendian. Just reverses the order of the bytes. + return int.from_bytes( + bytes_as_int.to_bytes( + length=(nbits // 8) + 1 if nbits % 8 else (nbits // 8), + byteorder="little" + ), + byteorder="big" + ) + if name in ("int", "intbe"): # Compute two's complement for signed integer of any size (nbits) - if (bytes_as_int & (1 << (nbits - 1))) != 0: # if sign bit is set e.g., 8bit: 128-255 - return bytes_as_int - (1 << nbits) # compute negative value - return bytes_as_int # return positive value as is + return _twos_complement(bytes_as_int, nbits) + if name == "intle": + # Convert little-endian (LSB first) int to bigendian. Just reverses the order of the bytes. + bigendian_val = int.from_bytes( + bytes_as_int.to_bytes( + length=(nbits // 8) + 1 if nbits % 8 else (nbits // 8), + byteorder="little" + ), + byteorder="big" + ) + # Calculate twos complement + return _twos_complement(bigendian_val, nbits) if name == "floatbe": if nbits == 16: name = "!e" @@ -999,8 +1039,18 @@ def read(self, format_string: str, update_position: bool = True) -> Union[int, f elif nbits == 64: name = "!d" else: - raise ValueError(f"Unsupported float size {nbits}, only 32 and 64 are supported") + raise ValueError(f"Unsupported float size {nbits}, only 16, 32 and 64 are supported") return struct.unpack(name, int.to_bytes(bytes_as_int, nbits // 8, byteorder="big"))[0] + if name == "floatle": + if nbits == 16: + name = "e" + elif nbits == 32: + name = "f" + elif nbits == 64: + name = "d" + else: + raise ValueError(f"Unsupported float size {nbits}, only 16, 32 and 64 are supported") + return struct.unpack(name, int.to_bytes(bytes_as_int, nbits // 8, byteorder="little"))[0] if name == "bin": # Binary string return f"{bytes_as_int:0{nbits}b}" @@ -1184,14 +1234,15 @@ class StringDataEncoding(DataEncoding): 'UTF-16LE', 'UTF-16BE', 'UTF-32', 'UTF-32LE', 'UTF-32BE') def __init__(self, encoding: str = 'UTF-8', - byte_order: str = None, - termination_character: str = None, - fixed_length: int = None, - leading_length_size: int = None, - dynamic_length_reference: str = None, - use_calibrated_value: bool = True, - discrete_lookup_length: list = None, - length_linear_adjuster: callable = None): + byte_order: Optional[str] = None, + termination_character: Optional[str] = None, + fixed_length: Optional[int] = None, + leading_length_size: Optional[int] = None, + dynamic_length_reference: Optional[str] = None, + use_calibrated_value: Optional[bool] = True, + discrete_lookup_length: Optional[List[DiscreteLookup]] = None, + length_linear_adjuster: Optional[callable] = None): + # pylint: disable=pointless-statement f"""Constructor Only one of termination_character, fixed_length, or leading_length_size should be set. Setting more than one is nonsensical. @@ -1199,11 +1250,13 @@ def __init__(self, encoding: str = 'UTF-8', Parameters ---------- encoding : str - One of {self._supported_encodings}. Describes how to read the characters in the string. - byte_order : str + One of the XTCE-supported encodings: {self._supported_encodings} + Describes how to read the characters in the string. + Default is UTF-8. + byte_order : Optional[str] Description of the byte order, used for multi-byte character encodings where the endianness cannot be determined from the encoding specifier. Can be None if encoding is single-byte or UTF-*BE/UTF-*LE. - termination_character : str + termination_character : Optional[str] A single hexadecimal character, represented as a string. Must be encoded in the same encoding as the string itself. For example, for a utf-8 encoded string, the hex string must be two hex characters (one byte). For a UTF-16* encoded string, the hex representation of the termination character must be four characters @@ -1214,7 +1267,7 @@ def __init__(self, encoding: str = 'UTF-8', Fixed size in bits of a leading field that contains the length of the subsequent string. dynamic_length_reference : Optional[str] Name of referenced parameter for dynamic length, in bits. May be combined with a linear_adjuster - use_calibrated_value: bool + use_calibrated_value: Optional[bool] Whether to use the calibrated value on the referenced parameter in dynamic_length_reference. Default is True. discrete_lookup_length : Optional[List[DiscreteLookup]] @@ -1224,7 +1277,8 @@ def __init__(self, encoding: str = 'UTF-8', linear adjuster should multiply by 8 to give the size in bits. """ if encoding not in self._supported_encodings: - raise ValueError(f"Got encoding={encoding}. Encoding must be one of {self._supported_encodings}.") + raise ValueError(f"Got encoding={encoding} (uppercased). " + f"Encoding must be one of {self._supported_encodings}.") self.encoding = encoding # Check that the termination character is a single character in the specified encoding # e.g. b'\x58' in utf-8 is "X" @@ -1235,13 +1289,14 @@ def __init__(self, encoding: str = 'UTF-8', f"hex string representation of a single character, e.g. '58' for character 'X' in UTF-8 " f"or '5800' for character 'X' in UTF-16LE. Note that variable-width encoding is not " f"yet supported in any encoding.") - if encoding not in ['US-ASCII', 'ISO-8859-1', 'Windows-1252', 'UTF-8']: # for these, byte order doesn't matter + if encoding not in ['US-ASCII', 'ISO-8859-1', 'Windows-1252', 'UTF-8']: # for these, byte order doesn't matter if byte_order is None: - if encoding[-2:] in ("LE", "BE"): - self.byte_order = {"LE": "leastSignificantByteFirst", - "BE": "mostSignificantByteFirst"}[encoding[-2:]] + if "LE" in encoding: + self.byte_order = "leastSignificantByteFirst" + elif "BE" in encoding: + self.byte_order = "mostSignificantByteFirst" else: - raise ValueError(f"Byte order must be specified for multi-byte character encodings.") + raise ValueError("Byte order must be specified for multi-byte character encodings.") else: self.byte_order = byte_order self.termination_character = termination_character # Always in hex, per 4.3.2.2.5.5.4 of XTCE spec @@ -1370,7 +1425,7 @@ def parse_value(self, packet_data: PacketData, parsed_data: dict, **kwargs) -> T bitstring_format, skip_bits_after = self._get_format_string(packet_data, parsed_data) parsed_value = packet_data.read(bitstring_format) packet_data.pos += skip_bits_after # Allows skip over termination character - return parsed_value.decode(self.encoding.lower()), None + return parsed_value.decode(self.encoding), None @classmethod def from_data_encoding_xml_element(cls, element: ElementTree.Element, ns: dict) -> 'StringDataEncoding': @@ -1395,8 +1450,8 @@ def from_data_encoding_xml_element(cls, element: ElementTree.Element, ns: dict) """ encoding: str = element.get("encoding", "UTF-8") - byte_order = None # fallthrough value - if encoding not in ('US-ASCII', 'ISO-8859-1', 'Windows-1252', 'UTF-8'): # single-byte chars + byte_order = None # fallthrough value + if encoding not in ('US-ASCII', 'ISO-8859-1', 'Windows-1252', 'UTF-8'): # single-byte chars if not (encoding.endswith("BE") or encoding.endswith("LE")): byte_order = element.get("byteOrder") if byte_order is None: @@ -1451,8 +1506,9 @@ def from_data_encoding_xml_element(cls, element: ElementTree.Element, ns: dict) class NumericDataEncoding(DataEncoding, metaclass=ABCMeta): """Abstract class that is inherited by IntegerDataEncoding and FloatDataEncoding""" - def __init__(self, size_in_bits: int, encoding: str, - byte_order: str = "mostSignficantByteFirst", + def __init__(self, size_in_bits: int, + encoding: str, + byte_order: str = "mostSignificantByteFirst", default_calibrator: Optional[Calibrator] = None, context_calibrators: Optional[List[ContextCalibrator]] = None): """Constructor @@ -1466,6 +1522,8 @@ def __init__(self, size_in_bits: int, encoding: str, though 'signed' is not actually a valid specifier according to XTCE. 'twosCompliment' [sic] should be used instead, though we support the unofficial 'signed' specifier here. For supported specifiers, see XTCE spec 4.3.2.2.5.6.2 + byte_order : str + Description of the byte order. Default is 'mostSignficantByteFirst' (big-endian). default_calibrator : Optional[Calibrator] Optional Calibrator object, containing information on how to transform the integer-encoded data, e.g. via a polynomial conversion or spline interpolation. @@ -1535,13 +1593,13 @@ def _get_format_string(self, packet_data: PacketData, parsed_data: dict) -> str: else: raise NotImplementedError(f"Unrecognized encoding {self.encoding}. " f"Only signed and unsigned have been implemented.") - if self.size_in_bits % 8: # if not a whole-byte value, disregard byte order - endianness = "" + + if self.byte_order == 'mostSignificantByteFirst': + endianness = "be" + elif self.byte_order == "leastSignificantByteFirst": + endianness = "le" else: - endianness = {"leastSignificantByteFirst": "le", - "mostSignificantByteFirst": "be"}.get(self.byte_order) - if endianness is None: - raise NotImplementedError(f"Unrecognized byte order {self.byte_order}.") + raise NotImplementedError(f"Unrecognized byte order {self.byte_order}.") return f"{base}{endianness}:{self.size_in_bits}" @classmethod @@ -1560,7 +1618,7 @@ def from_data_encoding_xml_element(cls, element: ElementTree.Element, ns: dict) : cls """ size_in_bits = int(element.attrib['sizeInBits']) - encoding = element.attrib['encoding'] + encoding = element.attrib['encoding'] if 'encoding' in element.attrib else "unsigned" byte_order = element.get("byteOrder", "mostSignificantByteFirst") calibrator = cls.get_default_calibrator(element, ns) context_calibrators = cls.get_context_calibrators(element, ns) @@ -1573,6 +1631,7 @@ class FloatDataEncoding(NumericDataEncoding): _supported_encodings = ['IEEE-754', 'MIL-1750A'] def __init__(self, size_in_bits: int, encoding: str = 'IEEE-754', + byte_order: str = 'mostSignificantByteFirst', default_calibrator: Optional[Calibrator] = None, context_calibrators: Optional[List[ContextCalibrator]] = None): """Constructor @@ -1585,6 +1644,8 @@ def __init__(self, size_in_bits: int, encoding: str = 'IEEE-754', Size of the encoded value, in bits. encoding : str Encoding method of the float data. Must be either 'IEEE-754' or 'MIL-1750A'. Defaults to IEEE-754. + byte_order : str + Description of the byte order. Default is 'mostSignificantByteFirst' (big endian). default_calibrator : Optional[Calibrator] Optional Calibrator object, containing information on how to transform the data, e.g. via a polynomial conversion or spline interpolation. @@ -1611,7 +1672,7 @@ def _get_format_string(self, packet_data: PacketData, parsed_data: dict) -> str: str Format string in the bitstring format. e.g. uint:16 """ - if self.size_in_bits % 8: # disregard byte order for sub-byte fields + if self.size_in_bits % 8: # disregard byte order for sub-byte fields endianness = "" else: endianness = {"leastSignificantByteFirst": "le", diff --git a/tests/unit/test_xtcedef.py b/tests/unit/test_xtcedef.py index fd6db0c..f0403b1 100644 --- a/tests/unit/test_xtcedef.py +++ b/tests/unit/test_xtcedef.py @@ -39,7 +39,7 @@ def test_invalid_parameter_type_error(test_data_dir): """ - x = io.TextIOWrapper(io.BytesIO(test_xtce_document.encode("utf-8"))) + x = io.TextIOWrapper(io.BytesIO(test_xtce_document.encode("UTF-8"))) with pytest.raises(xtcedef.InvalidParameterTypeError): xtcedef.XtcePacketDefinition(x) @@ -80,7 +80,7 @@ def test_unsupported_parameter_type_error(test_data_dir): """ - x = io.TextIOWrapper(io.BytesIO(test_xtce_document.encode("utf-8"))) + x = io.TextIOWrapper(io.BytesIO(test_xtce_document.encode("UTF-8"))) with pytest.raises(NotImplementedError): xtcedef.XtcePacketDefinition(x) @@ -747,13 +747,13 @@ def test_polynomial_calibrator_calibrate(xq, expectation): ('xml_string', 'expectation'), [ (""" - + 0058 """, - xtcedef.StringDataEncoding(termination_character='0058', encoding='utf-16-be')), + xtcedef.StringDataEncoding(termination_character='0058', encoding='UTF-16BE')), (""" @@ -1165,7 +1165,7 @@ def test_string_parameter_type(xml_string: str, expectation): # Termination character tests (xtcedef.StringParameterType( 'TEST_STRING', - xtcedef.StringDataEncoding(encoding='utf-8', + xtcedef.StringDataEncoding(encoding='UTF-8', termination_character='58')), {}, # Don't need parsed_data for termination character # 123X456 + extra characters, termination character is X @@ -1173,7 +1173,7 @@ def test_string_parameter_type(xml_string: str, expectation): '123'), (xtcedef.StringParameterType( 'TEST_STRING', - xtcedef.StringDataEncoding(encoding='utf-8', + xtcedef.StringDataEncoding(encoding='UTF-8', termination_character='58')), {}, # Don't need parsed_data for termination character # 56bits + 123X456 + extra characters, termination character is X @@ -1181,7 +1181,7 @@ def test_string_parameter_type(xml_string: str, expectation): '123'), (xtcedef.StringParameterType( 'TEST_STRING', - xtcedef.StringDataEncoding(encoding='utf-8', + xtcedef.StringDataEncoding(encoding='UTF-8', termination_character='58')), {}, # Don't need parsed_data for termination character # 53bits + 123X456 + extra characters, termination character is X @@ -1190,32 +1190,32 @@ def test_string_parameter_type(xml_string: str, expectation): '123'), (xtcedef.StringParameterType( "TEST_STRING", - xtcedef.StringDataEncoding(encoding="utf-8", + xtcedef.StringDataEncoding(encoding="UTF-8", termination_character='00')), {}, - xtcedef.PacketData("false_is_truthy".encode("utf-8") + b'\x00ABCD'), + xtcedef.PacketData("false_is_truthy".encode("UTF-8") + b'\x00ABCD'), 'false_is_truthy'), (xtcedef.StringParameterType( "TEST_STRING", - xtcedef.StringDataEncoding(encoding="utf-16-be", + xtcedef.StringDataEncoding(encoding="UTF-16BE", termination_character='0021')), {}, - xtcedef.PacketData("false_is_truthy".encode("utf-16-be") + b'\x00\x21ignoreme'), + xtcedef.PacketData("false_is_truthy".encode("UTF-16BE") + b'\x00\x21ignoreme'), 'false_is_truthy'), (xtcedef.StringParameterType( 'TEST_STRING', - xtcedef.StringDataEncoding(encoding='utf-16-le', + xtcedef.StringDataEncoding(encoding='UTF-16LE', termination_character='5800')), {}, # Don't need parsed_data for termination character # 123X456, termination character is X - xtcedef.PacketData('123X456'.encode('utf-16-le')), + xtcedef.PacketData('123X456'.encode('UTF-16LE')), '123'), (xtcedef.StringParameterType( 'TEST_STRING', - xtcedef.StringDataEncoding(encoding='utf-16-be', + xtcedef.StringDataEncoding(encoding='UTF-16BE', termination_character='0058')), {}, # Don't need parsed_data for termination character - xtcedef.PacketData('123X456'.encode('utf-16-be')), + xtcedef.PacketData('123X456'.encode('UTF-16BE')), '123'), # Leading length test (xtcedef.StringParameterType( @@ -1321,11 +1321,25 @@ def test_integer_parameter_type(xml_string: str, expectation): {}, xtcedef.PacketData(0b1000000000000000.to_bytes(length=2, byteorder='big')), 32768), + # 16-bit unsigned little endian at byte boundary + (xtcedef.IntegerParameterType( + 'TEST_INT', + xtcedef.IntegerDataEncoding(16, 'unsigned', byte_order="leastSignificantByteFirst")), + {}, + xtcedef.PacketData(0b1000000000000000.to_bytes(length=2, byteorder='big')), + 128), # 16-bit signed starting at byte boundary (xtcedef.IntegerParameterType('TEST_INT', xtcedef.IntegerDataEncoding(16, 'signed')), {}, xtcedef.PacketData(0b1111111111010110.to_bytes(length=2, byteorder='big')), -42), + # 16-bit signed little endian starting at byte boundary + (xtcedef.IntegerParameterType( + 'TEST_INT', + xtcedef.IntegerDataEncoding(16, 'signed', byte_order="leastSignificantByteFirst")), + {}, + xtcedef.PacketData(0b1101011011111111.to_bytes(length=2, byteorder='big')), + -42), # 16-bit signed integer starting at a byte boundary, # calibrated by a polynomial y = (x*2 + 5); x = -42; y = -84 + 5 = -79 (xtcedef.IntegerParameterType( @@ -1379,6 +1393,16 @@ def test_integer_parameter_type(xml_string: str, expectation): # |---int:12---| xtcedef.PacketData(0b11111110000000000011111110101010.to_bytes(length=4, byteorder='big'), pos=6), -2048), + # 12-bit signed little endian integer starting on bit 6 of the first byte + (xtcedef.IntegerParameterType( + 'TEST_INT', + xtcedef.IntegerDataEncoding(12, 'signed', byte_order='leastSignificantByteFirst')), + {}, + # 12-bit signed little endian integer starting on bit 4 of the first byte. The LSB of the integer comes first + # 11111100 00000010 00111111 10101010 + # |---int:12---| + xtcedef.PacketData(0b11111100000000100011111110101010.to_bytes(length=4, byteorder='big'), pos=6), + -2048), (xtcedef.IntegerParameterType('TEST_INT', xtcedef.IntegerDataEncoding(3, 'twosComplement')), {}, # 3-bit signed integer starting at bit 7 of the first byte @@ -1489,14 +1513,24 @@ def test_float_parameter_type(xml_string: str, expectation): @pytest.mark.parametrize( ('parameter_type', 'parsed_data', 'packet_data', 'expected'), [ + # Test big endion 32-bit IEEE float (xtcedef.FloatParameterType('TEST_FLOAT', xtcedef.FloatDataEncoding(32)), {}, xtcedef.PacketData(0b01000000010010010000111111010000.to_bytes(length=4, byteorder='big')), 3.14159), + # Test little endian 32-bit IEEE float + (xtcedef.FloatParameterType( + 'TEST_FLOAT', + xtcedef.FloatDataEncoding(32, byte_order='leastSignificantByteFirst')), + {}, + xtcedef.PacketData(0b01000000010010010000111111010000.to_bytes(length=4, byteorder='big')), + 3.14159), + # Test big endian 64-bit float (xtcedef.FloatParameterType('TEST_FLOAT', xtcedef.FloatDataEncoding(64)), {}, xtcedef.PacketData(b'\x3F\xF9\xE3\x77\x9B\x97\xF4\xA8'), # 64-bit IEEE 754 value of Phi 1.61803), + # Test float parameter type encoded as big endian 16-bit integer with contextual polynomial calibrator (xtcedef.FloatParameterType( 'TEST_FLOAT', xtcedef.IntegerDataEncoding( @@ -1731,7 +1765,7 @@ def test_binary_parameter_parsing(parameter_type, parsed_data, packet_data, expe smoot - + 00 @@ -1765,7 +1799,7 @@ def test_boolean_parameter_type(xml_string, expectation): '0', True), (xtcedef.BooleanParameterType( 'TEST_BOOL', - xtcedef.StringDataEncoding(encoding="utf-8", termination_character='00')), + xtcedef.StringDataEncoding(encoding="UTF-8", termination_character='00')), {}, xtcedef.PacketData(0b011001100110000101101100011100110110010101011111011010010111001101011111011101000111001001110101011101000110100001111001000000000010101101010111.to_bytes(length=18, byteorder='big')), 'false_is_truthy', True),