Skip to content

Commit

Permalink
Merge pull request #84 from CybercentreCanada/AL-2766
Browse files Browse the repository at this point in the history
apply formatting and silence eml conversions
  • Loading branch information
cccs-rs authored Nov 9, 2023
2 parents 240e120 + 018c1f7 commit 18cf4ab
Showing 1 changed file with 50 additions and 43 deletions.
93 changes: 50 additions & 43 deletions document_preview/helper/emlrender.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

try:
from PIL import Image

Image.MAX_IMAGE_PIXELS = 2147483647
except:
print('[ERROR] pillow module not installed ("pip install pillow")')
Expand All @@ -40,8 +41,8 @@
__email__ = "[email protected]"
__name__ = "EMLRender"

textTypes = ['text/plain', 'text/html']
imageTypes = ['image/gif', 'image/jpeg', 'image/png']
textTypes = ["text/plain", "text/html"]
imageTypes = ["image/gif", "image/jpeg", "image/png"]


def appendImages(images):
Expand All @@ -50,7 +51,7 @@ def appendImages(images):

new_width = max(widths)
new_height = sum(heights)
new_im = Image.new('RGB', (new_width, new_height), color=bgColor)
new_im = Image.new("RGB", (new_width, new_height), color=bgColor)
offset = 0
for im in images:
# x = int((new_width - im.size[0])/2)
Expand All @@ -61,58 +62,58 @@ def appendImages(images):


def processEml(data, output_dir, logger, load_ext_images=False, load_images=False):
'''
"""
Process the email (bytes), extract MIME parts and useful headers.
Generate a PNG picture of the mail
'''
"""
msg = email.message_from_bytes(data)
try:
decode = email.header.decode_header(msg['Date'])[0]
decode = email.header.decode_header(msg["Date"])[0]
dateField = str(decode[0])
except:
dateField = '<Unknown>'
logger.info('Date: %s' % dateField)
dateField = "<Unknown>"
logger.info("Date: %s" % dateField)

try:
decode = email.header.decode_header(msg['From'])[0]
decode = email.header.decode_header(msg["From"])[0]
fromField = str(decode[0])
except:
fromField = '<Unknown>'
logger.info('From: %s' % fromField)
fromField = fromField.replace('<', '&lt;').replace('>', '&gt;')
fromField = "&lt;Unknown&gt;"
logger.info("From: %s" % fromField)
fromField = fromField.replace("<", "&lt;").replace(">", "&gt;")

try:
decode = email.header.decode_header(msg['To'])[0]
decode = email.header.decode_header(msg["To"])[0]
toField = str(decode[0])
except:
toField = '&lt;Unknown&gt;'
logger.info('To: %s' % toField)
toField = toField.replace('<', '&lt;').replace('>', '&gt;')
toField = "&lt;Unknown&gt;"
logger.info("To: %s" % toField)
toField = toField.replace("<", "&lt;").replace(">", "&gt;")

try:
decode = email.header.decode_header(msg['Subject'])[0]
decode = email.header.decode_header(msg["Subject"])[0]
subjectField = str(decode[0])
except:
subjectField = '&lt;Unknown&gt;'
logger.info('Subject: %s' % subjectField)
subjectField = subjectField.replace('<', '&lt;').replace('>', '&gt;')
subjectField = "&lt;Unknown&gt;"
logger.info("Subject: %s" % subjectField)
subjectField = subjectField.replace("<", "&lt;").replace(">", "&gt;")

try:
decode = email.header.decode_header(msg['Message-Id'])[0]
decode = email.header.decode_header(msg["Message-Id"])[0]
idField = str(decode[0])
except:
idField = '&lt;Unknown&gt;'
logger.info('Message-Id: %s' % idField)
idField = idField.replace('<', '&lt;').replace('>', '&gt;')
idField = "&lt;Unknown&gt;"
logger.info("Message-Id: %s" % idField)
idField = idField.replace("<", "&lt;").replace(">", "&gt;")

imgkitOptions = {'load-error-handling': 'skip'}
imgkitOptions = {"load-error-handling": "skip", "quiet": None}
if not load_ext_images:
imgkitOptions.update({'no-images': None, 'disable-javascript': None})
imgkitOptions.update({"no-images": None, "disable-javascript": None})
# imgkitOptions.update({ 'quiet': None })
imagesList = []

# Build a first image with basic mail details
headers = '''
headers = """
<table width="100%%">
<tr><td align="right"><b>Date:</b></td><td>%s</td></tr>
<tr><td align="right"><b>From:</b></td><td>%s</td></tr>
Expand All @@ -121,33 +122,39 @@ def processEml(data, output_dir, logger, load_ext_images=False, load_images=Fals
<tr><td align="right"><b>Message-Id:</b></td><td>%s</td></tr>
</table>
<hr></p>
''' % (dateField, fromField, toField, subjectField, idField)
""" % (
dateField,
fromField,
toField,
subjectField,
idField,
)
try:
header_path = NamedTemporaryFile(suffix=".png").name
imgkit.from_string(headers, header_path, options=imgkitOptions)
logger.info('Created headers %s' % header_path)
logger.info("Created headers %s" % header_path)
imagesList.append(header_path)
except Exception as e:
logger.warning(f'Creation of headers failed: {e}')
logger.warning(f"Creation of headers failed: {e}")

#
# Main loop - process the MIME parts
#
for part in msg.walk():
mimeType = part.get_content_type()
if part.is_multipart():
logger.info('Multipart found, continue')
logger.info("Multipart found, continue")
continue

logger.info('Found MIME part: %s' % mimeType)
logger.info("Found MIME part: %s" % mimeType)
if mimeType in textTypes:
try:
# Fix formatting
payload = part.get_payload(decode=True)
payload = regex.sub(rb"(\r\n){1,}", b"\r\n", payload)
payload = payload.replace(b"\r\n", b'<br>')
payload = regex.sub(rb"(<br> ){2,}", b'<br><br>', payload)
payload = quopri.decodestring(payload).decode('utf-8', errors="ignore")
payload = payload.replace(b"\r\n", b"<br>")
payload = regex.sub(rb"(<br> ){2,}", b"<br><br>", payload)
payload = quopri.decodestring(payload).decode("utf-8", errors="ignore")
except Exception as e:
payload = str(quopri.decodestring(part.get_payload(decode=True)))[2:-1]

Expand All @@ -159,31 +166,31 @@ def processEml(data, output_dir, logger, load_ext_images=False, load_images=Fals
try:
payload_path = NamedTemporaryFile(suffix=".png").name
imgkit.from_string(payload, payload_path, options=imgkitOptions)
logger.info('Decoded %s' % payload_path)
logger.info("Decoded %s" % payload_path)
imagesList.append(payload_path)
except Exception as e:
logger.warning(f'Decoding this MIME part returned error: {e}')
logger.warning(f"Decoding this MIME part returned error: {e}")

elif mimeType in imageTypes and load_images:
payload = part.get_payload(decode=False)
payload_path = NamedTemporaryFile(suffix=".png").name
imgdata = base64.b64decode(payload)
try:
with open(payload_path, 'wb') as f:
with open(payload_path, "wb") as f:
f.write(imgdata)
logger.info('Decoded %s' % payload_path)
logger.info("Decoded %s" % payload_path)
imagesList.append(payload_path)
except Exception as e:
logger.warning(f'Decoding this MIME part returned error: {e}')
logger.warning(f"Decoding this MIME part returned error: {e}")

resultImage = os.path.join(output_dir, 'output.png')
resultImage = os.path.join(output_dir, "output.png")
if len(imagesList) > 0:
images = list(map(Image.open, imagesList))
combo = appendImages(images)
combo.save(resultImage)
# Clean up temporary images
for i in imagesList:
os.remove(i)
return(resultImage)
return resultImage
else:
return(False)
return False

0 comments on commit 18cf4ab

Please sign in to comment.