Skip to content

Commit

Permalink
Migrating to imageio (#11)
Browse files Browse the repository at this point in the history
* fixing handling of monochrome images. Transitioning to imageio. Avoiding disk IO

* adding missing data files
  • Loading branch information
dgrechka authored Aug 19, 2022
1 parent 1141a94 commit 88833e8
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 49 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
'kafka-python == 2.0.2',
'scikit-image == 0.19.3'
'imageio == 2.21.1'
]

[tool.hatch.build.targets.wheel]
Expand Down
2 changes: 1 addition & 1 deletion src/kafkajobs/serialization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .image import imagesFieldToNp, imagesNpToStrList
from .image import imagesFieldToNp, imagesNpToStrList, imageNpToB64SerializedStruct, imageB64SerializedStructToNp
from .np import base64strToNpArray, npArrayToBase64str
79 changes: 33 additions & 46 deletions src/kafkajobs/serialization/image.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,40 @@
from skimage import io
import tempfile
import os
import imageio.v3 as iio
import numpy as np
import base64
import shutil


def imageToRgbImage(image):
rank = len(image.shape)
if rank == 2:
# monochrome
# adding color channels
return np.stack((image, image, image), axis=2)
else:
# coloured
return image[:,:,:3] # discarding alpha channel if any

def imageNpToB64SerializedStruct(npImage):
rgbNumpyImage = imageToRgbImage(npImage)
jpg_encoded_bytes = iio.imwrite("<bytes>", rgbNumpyImage, extension=".jpeg")

image = {
'type': "jpg",
'data': base64.encodebytes(jpg_encoded_bytes).decode("utf-8").replace("\n","")
}
return image

def imagesNpToStrList(npImages):
tempDir = tempfile.mkdtemp()
try:
idx1 = 0
images = []
# encoding images
for npImage in npImages:
photoPath = os.path.join(tempDir,"{0}.jpeg".format(idx1))
io.imsave(photoPath, npImage[:,:,:3]) # discarding alpha channel if any
#print("image {0} saved".format(photoPath))
with open(photoPath, 'rb') as photoFile:
photo = photoFile.read()
#print("image {0} read".format(photoPath))
image = {
'type': "jpg",
'data': base64.encodebytes(photo).decode("utf-8").replace("\n","")
}
images.append(image)
idx1 += 1
return images
finally:
shutil.rmtree(tempDir)
"""Obsolete: left for backward compatibility"""
return [imageNpToB64SerializedStruct(x) for x in npImages]

def imageB64SerializedStructToNp(b64SerializedStruct):
imgType = b64SerializedStruct['type']
image_b64 : str = b64SerializedStruct['data']
imageData = base64.decodebytes(image_b64.encode("utf-8"))
imNumpy = iio.imread(imageData, extension=f".{imgType}")
# guard againes old version of encoders
return imageToRgbImage(imNumpy)

def imagesFieldToNp(images):
tempDir = tempfile.mkdtemp()
try:
imgIdx = 0
imagesNp = []
# decoding images
for image in images:
imgType = image['type']
image_b64 : str = image['data']
imageData = base64.decodebytes(image_b64.encode("utf-8"))
imageFilePath = os.path.join(tempDir,"{0}.{1}".format(imgIdx,imgType))
with open(imageFilePath, "wb") as file1:
file1.write(imageData)
try:
imNumpy = io.imread(imageFilePath)
imagesNp.append(imNumpy)
except Exception as exc1:
print("Error calulating hash for one of the images ({0})".format(exc1))
imgIdx += 1
return imagesNp
finally:
shutil.rmtree(tempDir)
"""Obsolete: left for backward compatibility"""
return [imageB64SerializedStructToNp(x) for x in images]
Binary file added tests/data/343911.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/data/monochrome.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 25 additions & 1 deletion tests/test_serialization.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,38 @@
import unittest
import base64


import kafkajobs.serialization as serialization

import imageio.v2 as imageio
import numpy as np

class TestSerialization(unittest.TestCase):
def test_image_round_trip(self):
nparr = imageio.imread('tests/data/343911.jpg')
shape = nparr.shape

roundTripped = serialization.imageB64SerializedStructToNp(serialization.imageNpToB64SerializedStruct(nparr))

assert roundTripped.shape == shape, f'shape {roundTripped.shape} is not {shape}'

def test_issue8(self):
nparr = imageio.imread('tests/data/issue_8.png')
#print(f'shape {nparr.shape}')
serialized = serialization.imagesNpToStrList([nparr])
serialized = serialization.imagesNpToStrList([nparr])

def test_deserializing_monochrome(self):
# manual encoding
with open('tests/data/monochrome.png', 'rb') as photoFile:
photo = photoFile.read()

image = {
'type': "jpg",
'data': base64.encodebytes(photo).decode("utf-8").replace("\n","")
}

deserialized = serialization.imageB64SerializedStructToNp(image) # this one creates 3 colour channels
assert deserialized.shape == (400, 400, 3), f'shape {deserialized.shape}'

if __name__ == '__main__':
unittest.main()

0 comments on commit 88833e8

Please sign in to comment.