core image documentation

maeriil · Oct 13, 2023 · df97fb7 · df97fb7
1 parent a68286e
commit df97fb7
Showing 1 changed file with 110 additions and 236 deletions.
diff --git a/backend/modules/coreimage.py b/backend/modules/coreimage.py
@@ -1,287 +1,161 @@
-import utilities.helpers.imageHelpers as imgutil
-import utilities.helpers.stringHelpers as strutil
+"""
+This module, coreimage, contains multiple image processing functions which
+can be used to process on the images. All the images are assumed to be of
+cv2 image format. 
+
+"""
 
 import cv2
 import numpy as np
-import textwrap
 import math
 
-from PIL import Image, ImageFont, ImageDraw
-
 
+# TODO: The 7.5 is the current number that works best for our use cases, but
+# this needs to be removed and replaced by more logical
 def calc_max_gap_dist(image: np.array) -> int:
-    height, width, channel = image.shape
+    """
+    This is the maximum distance (in pixels) to merge two bounding boxes
+    This current 7.5 pixels is the maximum gap distance for image with
+    1000 pixels height
+
+    Parameters
+    ----------
+    image : np.array
+        cv2 image to calculate gap distance
+
+    Returns
+    -------
+    int
+        Maximum gap distance in integer rounded down
+
+    """
+
+    height, _, _ = image.shape
     return math.floor((7.5 * height) / 1000)
 
 
+# TODO: Support being able to pass the kernel size. Currently the size
+# is already hardcoded in the code
 def sharpen_image(image: np.array) -> np.array:
-    """ """
-    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    """
+    Applies gaussian blur to the image in order to sharpen and remove noise
 
-    # Add blur to the image
-    # extract the gaussian kernal to global scope and make it a constant
-    image = cv2.GaussianBlur(image, (9, 9), 0)
+    Parameters
+    ----------
+    image : np.array
+        The cv2 image to process
+
+    Returns
+    -------
+    np.array
+        The processed cv2 image
+
+    """
 
-    # use threshold to do TODO: WHAT?
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    image = cv2.GaussianBlur(image, (9, 9), 0)
     image = cv2.threshold(
         image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
     )[1]
-
-    # Attempt to remove noise from the image
-    # extract the kernal matrix to global scope and make it a constant
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-
-    # What does this do? TODO
     image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel, iterations=1)
 
-    # Invert the image colours
-    # image = 255 - image
-
     return image
 
 
-def temp_test(im):
-    # smooth the image with alternative closing and opening
-    # with an enlarging kernel
-    morph = im.copy()
-
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
-    morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
-    morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
-
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
-
-    # take morphological gradient
-    # gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
-    gradient_image = morph
-
-    # split the gradient image into channels
-    image_channels = np.split(np.asarray(gradient_image), 3, axis=2)
-
-    channel_height, channel_width, _ = image_channels[0].shape
-
-    # apply Otsu threshold to each channel
-    for i in range(0, 3):
-        _, image_channels[i] = cv2.threshold(
-            ~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY
-        )
-        image_channels[i] = np.reshape(
-            image_channels[i], newshape=(channel_height, channel_width, 1)
-        )
-
-    # merge the channels
-    image_channels = np.concatenate(
-        (image_channels[0], image_channels[1], image_channels[2]), axis=2
-    )
-    return image_channels
-
-
+# TODO: Implement this function
 def binarize_image(image: np.array) -> np.array:
-    """ """
-    pass
+    """
+    Convert the image into a black and white image
 
+    Parameters
+    ----------
+    image : np.array
+        The cv2 image to convert to binary
 
-def inlarge_image(image: np.array, configs: list) -> np.array:
-    """ """
-    pass
-
+    Returns
+    -------
+    np.array
+        The processed cv2 image
 
-def shrink_image(image: np.array, configs: list) -> np.array:
-    """ """
+    """
     pass
 
 
-def morph_close(image: np.array, kernal_size: int = 5) -> np.array:
-    kernel = cv2.getStructuringElement(
-        cv2.MORPH_ELLIPSE, (kernal_size, kernal_size)
-    )
-    processed_img = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
-
-    return processed_img
-
-
-def morph_dilate(image: np.array, kernal_size: int = 5) -> np.array:
-    kernel = cv2.getStructuringElement(
-        cv2.MORPH_ELLIPSE, (kernal_size, kernal_size)
-    )
-    processed_img = cv2.morphologyEx(image, cv2.MORPH_DILATE, kernel)
+# TODO: Implement this function
+def inlarge_image(image: np.array) -> np.array:
+    """
+    Inlarge the size of the image. Does NOT support scaling down of image
 
-    return processed_img
+    Parameters
+    ----------
+    image  : np.array
+        The cv2 image to inlarge
 
+    Returns
+    -------
+    np.array
+        The inlarged cv2 image
 
-def crop_image(image, box: list):
-    """ """
-    (top_left, top_right, bottom_right, bottom_left) = box
-    x1, y1, x2, y2 = (
-        int(top_left[0]),
-        int(top_left[1]),
-        int(top_right[0]),
-        int(bottom_left[1]),
-    )
-
-    cropped = image[y1:y2, x1:x2]
-
-    return cropped
-
-
-def get_text_dimensions(text_string, font):
-    ascent, descent = font.getmetrics()
-
-    text_width = font.getmask(text_string).getbbox()[2]
-    text_height = font.getmask(text_string).getbbox()[3] + descent
+    """
+    pass
 
-    return [text_width, text_height]
 
+# TODO: Implement this function
+def shrink_image(image: np.array) -> np.array:
+    """
+    Shrink the size of the image. Does NOT support scaling up of image
 
-def needs_vertical_layout(
-    string: str, width: int, height: int, font_type, padding
-) -> (bool, bool):
-    """"""
-    MAXIMUM_WIDTH_MULTIPLIER = 1.5
-    longest_word = strutil.get_longest_word_in_string(string)
-    text_width, text_height = get_text_dimensions(longest_word, font_type)
+    Parameters
+    ----------
+    image  : np.array
+        The cv2 image to shrink
 
-    text_width = math.floor((width - padding * 2) / text_width)
-    content = textwrap.wrap(string, width=width, break_long_words=False)
+    Returns
+    -------
+    np.array
+        The shrinked cv2 image
 
-    total_text_height = (text_height + padding) * len(content)
-    y_offset = math.floor((height - total_text_height) / 2)
+    """
+    pass
 
-    if text_width > width * MAXIMUM_WIDTH_MULTIPLIER and height > width:
-        print("first case Vertical Layout needed")
-        return True, False
-    if y_offset < 0 and height > width:
-        print("main case? Vertical Layout needed")
-        return True, False
-    elif text_width > width and text_width <= width * MAXIMUM_WIDTH_MULTIPLIER:
-        return False, True
-    #  default_padding=2, 2*padding=4,
-    # TODO: remove magic number and make it a constant somewhere
-    elif text_width >= width and text_width - 4 <= width:
-        return False, True
-    else:
-        return False, False
 
+# TODO: Use a Point datatype for start_pos and end_pos
+def replace_image_section(
+    image: np.array, text_image: np.array, start_pos: list
+) -> np.array:
+    """
+    Replace a section of the image with text_image. The coordinates on the
+    the start position is provided by start_pos
 
-def calculate_width(string: str) -> int:
-    return 15
+    The image and text_image must be of same type.
+    The start_pos must refer to point in image and in range of image dimension
 
+    Parameters
+    ----------
+    image : np.array
+        The cv2 image which is going to have its section replaced
 
-def insert_text(translated_text, image, box, font_size) -> Image.Image:
-    image_width = imgutil.calculate_box_width(box)
-    image_height = imgutil.calculate_box_height(box)
-    top_left, _, _, _ = imgutil.unpack_box(box)
+    text_image : np.array
+        The cv2 image which is image to replace with
 
-    image = insert_text_to_pil_img(
-        translated_text,
-        image,
-        image_width,
-        image_height,
-        top_left,
-        font_size=font_size,
-    )
+    start_pos : list
+        The starting [x, y] position to replace in image
 
-    return image
+    Returns
+    -------
+        The processed cv2 image
 
+    """
+    text_image_height = text_image.shape[0]
+    text_image_width = text_image.shape[1]
 
-def insert_text_to_pil_img(
-    translated_text,
-    image,
-    image_width,
-    image_height,
-    start_point,
-    font_size=12,
-    padding=2,
-) -> Image.Image:
-    # TODO: we need to export the following default properties outside of
-    # this file so we can change it/add more later accordingly
-    print(f"dimensions are {image_width}x{image_height}")
-    (x, y) = start_point
-    font_type = "utilities/fonts/Wild-Words-Roman.ttf"
-    font_color = "#000"
-    font_thickness = 1
-
-    print(f"x is {x}, y is {y}")
-
-    # first make sure the translated_text doesnt have unnecessary new lines
-    translated_text = strutil.remove_trailing_whitespace(translated_text)
-    text_font = ImageFont.truetype(font_type, font_size)
-    is_vertical_layout_needed, remove_padding = needs_vertical_layout(
-        translated_text, image_width, image_height, text_font, padding=padding
-    )
-
-    # determine which layout we will save the text as
-    # There are two ways we can save text, horizontally or vertically
-    # By default we prefer to use horizontal layout. However, if our bounding
-    # box is very tall but very small width, we will convert it to vertical
-    # text. Each line will only contain a single character.
-
-    # flip the image and call the same function
-    if is_vertical_layout_needed:
-        rotated_image = image.rotate(90, expand=True)
-        new_rotated_image = insert_text_to_pil_img(
-            translated_text,
-            rotated_image,
-            image_height,
-            image_width,
-            start_point,
-            font_size=font_size,
-        )
-        image = new_rotated_image.rotate(270, expand=True)
-    else:
-        drawing_img = ImageDraw.Draw(image)
-
-        if remove_padding:
-            padding = 0
-
-        text_width, text_height = get_text_dimensions("W", text_font)
-        print(f"img width={image_width}, height={image_height}")
-        print(f"text width={text_width}, height={text_height}")
-
-        text_width = math.ceil((image_width - padding * 2) / text_width)
-        print(f"updated text width={text_width}, height={text_height}")
-
-        content = textwrap.wrap(
-            translated_text, width=text_width, break_long_words=False
-        )
-
-        total_text_height = (text_height + padding) * len(content)
-        y_offset = math.floor((image_height - total_text_height) / 2)
-        print("y offset is ", y_offset)
-
-        for line in content:
-            text_box = drawing_img.textbbox((x, y), line, text_font)
-            line_width, line_height = get_text_dimensions(line, text_font)
-
-            # line_width = imgutil.calculate_width(text_box[2], text_box[0])
-            # line_height = imgutil.calculate_height(text_box[3], text_box[1])
-
-            x_offset = math.floor((image_width - line_width - padding * 2) / 2)
-
-            drawing_img.text(
-                (x + x_offset, y + y_offset),
-                line,
-                font=text_font,
-                fill="#000",
-                stroke_width=2,
-                stroke_fill="white",
-            )
-            y_offset += line_height + padding
-
-    # convert back to cv2 image and return it
-    return image
-
+    x_offset = int(start_pos[0])
+    y_offset = int(start_pos[1])
 
-def replace_image_section(image, text_image, box):
-    """ """
-    (top_left, top_right, bottom_right, bottom_left) = box
-    dimension = (top_right[0] - top_left[0], bottom_right[1] - top_right[1])
-    (x_offset, y_offset) = top_left
-    x_offset = int(x_offset)
-    y_offset = int(y_offset)
     image[
-        y_offset : y_offset + text_image.shape[0],
-        x_offset : x_offset + text_image.shape[1],
+        y_offset : y_offset + text_image_height,
+        x_offset : x_offset + text_image_width,
     ] = text_image
 
     return image