From 755df22797f3da24e7c070592aa3683fdfd325e5 Mon Sep 17 00:00:00 2001 From: Mikhail Iurkov Date: Wed, 19 Jan 2022 18:33:55 +0300 Subject: [PATCH] Tag parse refactor --- piexif/_exif.py | 22 +++++++ piexif/_load.py | 148 +++++++++++++++--------------------------------- tests/s_test.py | 5 -- 3 files changed, 69 insertions(+), 106 deletions(-) diff --git a/piexif/_exif.py b/piexif/_exif.py index c68d5ab..a34cb86 100644 --- a/piexif/_exif.py +++ b/piexif/_exif.py @@ -1,3 +1,6 @@ +from struct import calcsize + + class TYPES: Byte = 1 Ascii = 2 @@ -13,6 +16,25 @@ class TYPES: DFloat = 12 +TYPE_FORMAT = { + TYPES.Byte: 'B', + TYPES.Ascii: None, + TYPES.Short: 'H', + TYPES.Long: 'L', + TYPES.Rational: 'LL', + TYPES.SByte: 'b', + TYPES.Undefined: None, + TYPES.SShort: 'h', + TYPES.SLong: 'l', + TYPES.SRational: 'll', + TYPES.Float: 'f', + TYPES.DFloat: 'd', +} + + +TYPE_LENGTH = {t: calcsize('=' + f) for t, f in TYPE_FORMAT.items() if f} + + SIMPLE_NUMERICS = [ TYPES.Byte, TYPES.Short, diff --git a/piexif/_load.py b/piexif/_load.py index aa820c4..8fc1e1d 100644 --- a/piexif/_load.py +++ b/piexif/_load.py @@ -111,6 +111,42 @@ def get_ifd_dict(self, pointer, ifd_name, read_unknown=False): result = {} return result + def _read_tag(self, pointer): + tag, value_type, value_num = unpack_from( + self.endian_mark + "HHL", self.tiftag, pointer + ) + # Treat unknown types as `Undefined` + value_length = TYPE_LENGTH.get(value_type, 1) + value_length_total = value_length * value_num + if value_length_total > 4: + data_pointer = unpack_from( + self.endian_mark + "L", self.tiftag, pointer + 8 + )[0] + else: + data_pointer = pointer + 8 + + format = TYPE_FORMAT.get(value_type, None) + + if format is None: + # Ascii, Undefined and unknown types + if value_type == TYPES.Ascii: + # Crop ending zero + value_length_total = max(0, value_length_total - 1) + raw_value = self.tiftag[data_pointer:data_pointer+value_length_total] + values = (raw_value, ) + else: + # Unpacked types + values = unpack_from( + self.endian_mark + format * value_num, self.tiftag, data_pointer + ) + # Collate rationals + if len(format) > 1: + stride = len(format) + values = tuple( + values[i*stride:(i+1)*stride] for i in range(value_num) + ) + return tag, value_type, tuple(values) + def _do_get_ifd_dict(self, pointer, ifd_name, read_unknown=False): ifd_dict = {} tag_count = unpack_from(self.endian_mark + "H", @@ -122,24 +158,24 @@ def _do_get_ifd_dict(self, pointer, ifd_name, read_unknown=False): t = ifd_name for x in range(tag_count): pointer = offset + 12 * x - tag, value_type, value_num = unpack_from( - self.endian_mark + "HHL", self.tiftag, pointer) - value = self.tiftag[pointer+8: pointer+12] - v_set = (value_type, value_num, value, tag) + tag, value_type, values = self._read_tag(pointer) if tag in TAGS[t]: - converted = self.convert_value(v_set) expected_value_type = TAGS[t][tag]['type'] if value_type != expected_value_type: try: - converted = coerce(converted, value_type, expected_value_type) + values = coerce(values, value_type, expected_value_type) except ValueError: # Skip if coercion failed continue - if isinstance(converted, tuple) and (len(converted) == 1): - converted = converted[0] - ifd_dict[tag] = converted + if len(values) == 1: + values = values[0] + ifd_dict[tag] = values elif read_unknown: - ifd_dict[tag] = (v_set[0], v_set[1], v_set[2], self.tiftag) + value_num, = unpack_from( + self.endian_mark + "L", self.tiftag, pointer + 4 + ) + pointer_or_value = self.tiftag[pointer + 8: pointer + 12] + ifd_dict[tag] = value_type, value_num, pointer_or_value, self.tiftag else: pass @@ -148,96 +184,6 @@ def _do_get_ifd_dict(self, pointer, ifd_name, read_unknown=False): ifd_dict["first_ifd_pointer"] = self.tiftag[pointer:pointer + 4] return ifd_dict - def convert_value(self, val): - data = None - t = val[0] - length = val[1] - value = val[2] - - if t == TYPES.Byte: # BYTE - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from("B" * length, self.tiftag, pointer) - else: - data = unpack_from("B" * length, value) - elif t == TYPES.Ascii: # ASCII - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = self.tiftag[pointer: pointer+length - 1] - else: - data = value[0: length - 1] - elif t == TYPES.Short: # SHORT - if length > 2: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "H" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "H" * length, value) - elif t == TYPES.Long: # LONG - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "L" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "L" * length, value) - elif t == TYPES.Rational: # RATIONAL - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = tuple( - unpack_from(self.endian_mark + "LL", - self.tiftag, pointer + x * 8) - for x in range(length) - ) - elif t == TYPES.SByte: # SIGNED BYTES - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from("b" * length, self.tiftag, pointer) - else: - data = unpack_from("b" * length, value) - elif t == TYPES.Undefined: # UNDEFINED BYTES - if length > 4: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = self.tiftag[pointer: pointer+length] - else: - data = value[0: length] - elif t == TYPES.SShort: # SIGNED SHORT - if length > 2: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "h" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "h" * length, value) - elif t == TYPES.SLong: # SLONG - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "l" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "l" * length, value) - elif t == TYPES.SRational: # SRATIONAL - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = tuple( - unpack_from(self.endian_mark + "ll", - self.tiftag, pointer + x * 8) - for x in range(length) - ) - elif t == TYPES.Float: # FLOAT - if length > 1: - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "f" * length, - self.tiftag, pointer) - else: - data = unpack_from(self.endian_mark + "f" * length, value) - elif t == TYPES.DFloat: # DOUBLE - pointer = unpack_from(self.endian_mark + "L", value)[0] - data = unpack_from(self.endian_mark + "d" * length, - self.tiftag, pointer) - else: - raise ValueError("Exif might be wrong. Got incorrect value " + - "type to decode.\n" + - "tag: " + str(val[3]) + "\ntype: " + str(t)) - - return data - def _get_key_name_dict(exif_dict): new_dict = { @@ -254,7 +200,7 @@ def coerce(value, type, target): if target == TYPES.Undefined: if type == TYPES.Byte: # Interpret numbers as byte values, to fit Pillow behaviour - return b''.join(min(x, 255).to_bytes(1, 'big') for x in value) + return ( b''.join(min(x, 255).to_bytes(1, 'big') for x in value), ) elif target in SIMPLE_NUMERICS: if type in SIMPLE_NUMERICS: return value diff --git a/tests/s_test.py b/tests/s_test.py index 042df90..1003f57 100644 --- a/tests/s_test.py +++ b/tests/s_test.py @@ -657,11 +657,6 @@ def test_ExifReader_return_unknown(self): self.assertEqual(ifd[65535][1], 0) self.assertEqual(ifd[65535][2], b"\x00\x00") - def test_ExifReader_convert_value_fail(self): - er = piexif._load._ExifReader(I1) - with self.assertRaises(ValueError): - er.convert_value((None, None, None, None)) - def test_split_into_segments_fail1(self): with self.assertRaises(InvalidImageDataError): _common.split_into_segments(b"I'm not JPEG")