Skip to content

Commit

Permalink
core image documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
maeriil committed Oct 13, 2023
1 parent a68286e commit df97fb7
Showing 1 changed file with 110 additions and 236 deletions.
346 changes: 110 additions & 236 deletions backend/modules/coreimage.py
Original file line number Diff line number Diff line change
@@ -1,287 +1,161 @@
import utilities.helpers.imageHelpers as imgutil
import utilities.helpers.stringHelpers as strutil
"""
This module, coreimage, contains multiple image processing functions which
can be used to process on the images. All the images are assumed to be of
cv2 image format.
"""

import cv2
import numpy as np
import textwrap
import math

from PIL import Image, ImageFont, ImageDraw


# TODO: The 7.5 is the current number that works best for our use cases, but
# this needs to be removed and replaced by more logical
def calc_max_gap_dist(image: np.array) -> int:
height, width, channel = image.shape
"""
This is the maximum distance (in pixels) to merge two bounding boxes
This current 7.5 pixels is the maximum gap distance for image with
1000 pixels height
Parameters
----------
image : np.array
cv2 image to calculate gap distance
Returns
-------
int
Maximum gap distance in integer rounded down
"""

height, _, _ = image.shape
return math.floor((7.5 * height) / 1000)


# TODO: Support being able to pass the kernel size. Currently the size
# is already hardcoded in the code
def sharpen_image(image: np.array) -> np.array:
""" """
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
"""
Applies gaussian blur to the image in order to sharpen and remove noise
# Add blur to the image
# extract the gaussian kernal to global scope and make it a constant
image = cv2.GaussianBlur(image, (9, 9), 0)
Parameters
----------
image : np.array
The cv2 image to process
Returns
-------
np.array
The processed cv2 image
"""

# use threshold to do TODO: WHAT?
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.GaussianBlur(image, (9, 9), 0)
image = cv2.threshold(
image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
)[1]

# Attempt to remove noise from the image
# extract the kernal matrix to global scope and make it a constant
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

# What does this do? TODO
image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel, iterations=1)

# Invert the image colours
# image = 255 - image

return image


def temp_test(im):
# smooth the image with alternative closing and opening
# with an enlarging kernel
morph = im.copy()

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))

# take morphological gradient
# gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
gradient_image = morph

# split the gradient image into channels
image_channels = np.split(np.asarray(gradient_image), 3, axis=2)

channel_height, channel_width, _ = image_channels[0].shape

# apply Otsu threshold to each channel
for i in range(0, 3):
_, image_channels[i] = cv2.threshold(
~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY
)
image_channels[i] = np.reshape(
image_channels[i], newshape=(channel_height, channel_width, 1)
)

# merge the channels
image_channels = np.concatenate(
(image_channels[0], image_channels[1], image_channels[2]), axis=2
)
return image_channels


# TODO: Implement this function
def binarize_image(image: np.array) -> np.array:
""" """
pass
"""
Convert the image into a black and white image
Parameters
----------
image : np.array
The cv2 image to convert to binary
def inlarge_image(image: np.array, configs: list) -> np.array:
""" """
pass

Returns
-------
np.array
The processed cv2 image
def shrink_image(image: np.array, configs: list) -> np.array:
""" """
"""
pass


def morph_close(image: np.array, kernal_size: int = 5) -> np.array:
kernel = cv2.getStructuringElement(
cv2.MORPH_ELLIPSE, (kernal_size, kernal_size)
)
processed_img = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)

return processed_img


def morph_dilate(image: np.array, kernal_size: int = 5) -> np.array:
kernel = cv2.getStructuringElement(
cv2.MORPH_ELLIPSE, (kernal_size, kernal_size)
)
processed_img = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
# TODO: Implement this function
def inlarge_image(image: np.array) -> np.array:
"""
Inlarge the size of the image. Does NOT support scaling down of image
return processed_img
Parameters
----------
image : np.array
The cv2 image to inlarge
Returns
-------
np.array
The inlarged cv2 image
def crop_image(image, box: list):
""" """
(top_left, top_right, bottom_right, bottom_left) = box
x1, y1, x2, y2 = (
int(top_left[0]),
int(top_left[1]),
int(top_right[0]),
int(bottom_left[1]),
)

cropped = image[y1:y2, x1:x2]

return cropped


def get_text_dimensions(text_string, font):
ascent, descent = font.getmetrics()

text_width = font.getmask(text_string).getbbox()[2]
text_height = font.getmask(text_string).getbbox()[3] + descent
"""
pass

return [text_width, text_height]

# TODO: Implement this function
def shrink_image(image: np.array) -> np.array:
"""
Shrink the size of the image. Does NOT support scaling up of image
def needs_vertical_layout(
string: str, width: int, height: int, font_type, padding
) -> (bool, bool):
""""""
MAXIMUM_WIDTH_MULTIPLIER = 1.5
longest_word = strutil.get_longest_word_in_string(string)
text_width, text_height = get_text_dimensions(longest_word, font_type)
Parameters
----------
image : np.array
The cv2 image to shrink
text_width = math.floor((width - padding * 2) / text_width)
content = textwrap.wrap(string, width=width, break_long_words=False)
Returns
-------
np.array
The shrinked cv2 image
total_text_height = (text_height + padding) * len(content)
y_offset = math.floor((height - total_text_height) / 2)
"""
pass

if text_width > width * MAXIMUM_WIDTH_MULTIPLIER and height > width:
print("first case Vertical Layout needed")
return True, False
if y_offset < 0 and height > width:
print("main case? Vertical Layout needed")
return True, False
elif text_width > width and text_width <= width * MAXIMUM_WIDTH_MULTIPLIER:
return False, True
# default_padding=2, 2*padding=4,
# TODO: remove magic number and make it a constant somewhere
elif text_width >= width and text_width - 4 <= width:
return False, True
else:
return False, False

# TODO: Use a Point datatype for start_pos and end_pos
def replace_image_section(
image: np.array, text_image: np.array, start_pos: list
) -> np.array:
"""
Replace a section of the image with text_image. The coordinates on the
the start position is provided by start_pos
def calculate_width(string: str) -> int:
return 15
The image and text_image must be of same type.
The start_pos must refer to point in image and in range of image dimension
Parameters
----------
image : np.array
The cv2 image which is going to have its section replaced
def insert_text(translated_text, image, box, font_size) -> Image.Image:
image_width = imgutil.calculate_box_width(box)
image_height = imgutil.calculate_box_height(box)
top_left, _, _, _ = imgutil.unpack_box(box)
text_image : np.array
The cv2 image which is image to replace with
image = insert_text_to_pil_img(
translated_text,
image,
image_width,
image_height,
top_left,
font_size=font_size,
)
start_pos : list
The starting [x, y] position to replace in image
return image
Returns
-------
The processed cv2 image
"""
text_image_height = text_image.shape[0]
text_image_width = text_image.shape[1]

def insert_text_to_pil_img(
translated_text,
image,
image_width,
image_height,
start_point,
font_size=12,
padding=2,
) -> Image.Image:
# TODO: we need to export the following default properties outside of
# this file so we can change it/add more later accordingly
print(f"dimensions are {image_width}x{image_height}")
(x, y) = start_point
font_type = "utilities/fonts/Wild-Words-Roman.ttf"
font_color = "#000"
font_thickness = 1

print(f"x is {x}, y is {y}")

# first make sure the translated_text doesnt have unnecessary new lines
translated_text = strutil.remove_trailing_whitespace(translated_text)
text_font = ImageFont.truetype(font_type, font_size)
is_vertical_layout_needed, remove_padding = needs_vertical_layout(
translated_text, image_width, image_height, text_font, padding=padding
)

# determine which layout we will save the text as
# There are two ways we can save text, horizontally or vertically
# By default we prefer to use horizontal layout. However, if our bounding
# box is very tall but very small width, we will convert it to vertical
# text. Each line will only contain a single character.

# flip the image and call the same function
if is_vertical_layout_needed:
rotated_image = image.rotate(90, expand=True)
new_rotated_image = insert_text_to_pil_img(
translated_text,
rotated_image,
image_height,
image_width,
start_point,
font_size=font_size,
)
image = new_rotated_image.rotate(270, expand=True)
else:
drawing_img = ImageDraw.Draw(image)

if remove_padding:
padding = 0

text_width, text_height = get_text_dimensions("W", text_font)
print(f"img width={image_width}, height={image_height}")
print(f"text width={text_width}, height={text_height}")

text_width = math.ceil((image_width - padding * 2) / text_width)
print(f"updated text width={text_width}, height={text_height}")

content = textwrap.wrap(
translated_text, width=text_width, break_long_words=False
)

total_text_height = (text_height + padding) * len(content)
y_offset = math.floor((image_height - total_text_height) / 2)
print("y offset is ", y_offset)

for line in content:
text_box = drawing_img.textbbox((x, y), line, text_font)
line_width, line_height = get_text_dimensions(line, text_font)

# line_width = imgutil.calculate_width(text_box[2], text_box[0])
# line_height = imgutil.calculate_height(text_box[3], text_box[1])

x_offset = math.floor((image_width - line_width - padding * 2) / 2)

drawing_img.text(
(x + x_offset, y + y_offset),
line,
font=text_font,
fill="#000",
stroke_width=2,
stroke_fill="white",
)
y_offset += line_height + padding

# convert back to cv2 image and return it
return image

x_offset = int(start_pos[0])
y_offset = int(start_pos[1])

def replace_image_section(image, text_image, box):
""" """
(top_left, top_right, bottom_right, bottom_left) = box
dimension = (top_right[0] - top_left[0], bottom_right[1] - top_right[1])
(x_offset, y_offset) = top_left
x_offset = int(x_offset)
y_offset = int(y_offset)
image[
y_offset : y_offset + text_image.shape[0],
x_offset : x_offset + text_image.shape[1],
y_offset : y_offset + text_image_height,
x_offset : x_offset + text_image_width,
] = text_image

return image

0 comments on commit df97fb7

Please sign in to comment.