[save-images] Add multithreaded version of save-images

This improves performance by over 50% in some cases. This should also fix #450 since we use the Path module for files now instead of OpenCV's imwrite.
Breakthrough · Nov 10, 2024 · 54c0a00 · 54c0a00
1 parent 64f1bae
commit 54c0a00
Show file tree

Hide file tree

Showing 3 changed files with 265 additions and 5 deletions.
diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py
@@ -1435,7 +1435,7 @@ def save_images_command(
         "num_images": ctx.config.get_value("save-images", "num-images", num_images),
         "output_dir": output,
         "scale": scale,
-        "show_progress": ctx.quiet_mode,
+        "show_progress": not ctx.quiet_mode,
         "width": width,
     }
     ctx.add_command(cli_commands.save_images, save_images_args)

diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py
@@ -29,7 +29,7 @@
     write_scene_list_html,
 )
 from scenedetect.scene_manager import (
-    save_images as save_images_impl,
+    save_images_mt as save_images_impl,
 )
 from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge
 

diff --git a/scenedetect/scene_manager.py b/scenedetect/scene_manager.py
@@ -86,7 +86,8 @@ def on_new_scene(frame_img: numpy.ndarray, frame_num: int):
 import sys
 import threading
 from enum import Enum
-from typing import Callable, Dict, Iterable, List, Optional, TextIO, Tuple, Union
+from pathlib import Path
+from typing import Callable, Dict, List, Optional, TextIO, Tuple, Union
 
 import cv2
 import numpy as np
@@ -392,8 +393,39 @@ def write_scene_list_html(
     page.save(output_html_filename)
 
 
+def _scale_image(
+    image: cv2.Mat,
+    height: int,
+    width: int,
+    scale: float,
+    aspect_ratio: float,
+    interpolation: Interpolation,
+) -> cv2.Mat:
+    # TODO: Combine this resize with the ones below.
+    if aspect_ratio is not None:
+        image = cv2.resize(
+            image, (0, 0), fx=aspect_ratio, fy=1.0, interpolation=interpolation.value
+        )
+    image_height = image.shape[0]
+    image_width = image.shape[1]
+
+    # Figure out what kind of resizing needs to be done
+    if height or width:
+        if height and not width:
+            factor = height / float(image_height)
+            width = int(factor * image_width)
+        if width and not height:
+            factor = width / float(image_width)
+            height = int(factor * image_height)
+        assert height > 0 and width > 0
+        image = cv2.resize(image, (width, height), interpolation=interpolation.value)
+    elif scale:
+        image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=interpolation.value)
+    return image
+
+
 #
-# TODO(v1.0): Consider moving all post-processing functionality into a separate submodule.
+# TODO(v1.0): Move post-processing functions into separate submodule.
 def save_images(
     scene_list: SceneList,
     video: VideoStream,
@@ -479,7 +511,9 @@ def save_images(
 
     # Setup flags and init progress bar if available.
     completed = True
-    logger.info(f"Saving {num_images} images per scene to {output_dir}, format {image_extension}")
+    logger.info(
+        f"Saving {num_images} images per scene [format={image_extension}] {output_dir if output_dir else ''} "
+    )
     progress_bar = None
     if show_progress:
         progress_bar = tqdm(total=len(scene_list) * num_images, unit="images", dynamic_ncols=True)
@@ -597,6 +631,232 @@ def save_images(
     return image_filenames
 
 
+def save_images_mt(
+    scene_list: SceneList,
+    video: VideoStream,
+    num_images: int = 3,
+    frame_margin: int = 1,
+    image_extension: str = "jpg",
+    encoder_param: int = 95,
+    image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER",
+    output_dir: Optional[str] = None,
+    show_progress: Optional[bool] = False,
+    scale: Optional[float] = None,
+    height: Optional[int] = None,
+    width: Optional[int] = None,
+    interpolation: Interpolation = Interpolation.CUBIC,
+    video_manager=None,
+) -> Dict[int, List[str]]:
+    """Save a set number of images from each scene, given a list of scenes
+    and the associated video/frame source.
+
+    Arguments:
+        scene_list: A list of scenes (pairs of FrameTimecode objects) returned
+            from calling a SceneManager's detect_scenes() method.
+        video: A VideoStream object corresponding to the scene list.
+            Note that the video will be closed/re-opened and seeked through.
+        num_images: Number of images to generate for each scene.  Minimum is 1.
+        frame_margin: Number of frames to pad each scene around the beginning
+            and end (e.g. moves the first/last image into the scene by N frames).
+            Can set to 0, but will result in some video files failing to extract
+            the very last frame.
+        image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
+        encoder_param: Quality/compression efficiency, based on type of image:
+            'jpg' / 'webp':  Quality 0-100, higher is better quality.  100 is lossless for webp.
+            'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode.
+        image_name_template: Template to use for naming image files. Can use the template variables
+            $VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS.
+            Should not include an extension.
+        output_dir: Directory to output the images into.  If not set, the output
+            is created in the working directory.
+        show_progress: If True, shows a progress bar if tqdm is installed.
+        scale: Optional factor by which to rescale saved images. A scaling factor of 1 would
+            not result in rescaling. A value < 1 results in a smaller saved image, while a
+            value > 1 results in an image larger than the original. This value is ignored if
+            either the height or width values are specified.
+        height: Optional value for the height of the saved images. Specifying both the height
+            and width will resize images to an exact size, regardless of aspect ratio.
+            Specifying only height will rescale the image to that number of pixels in height
+            while preserving the aspect ratio.
+        width: Optional value for the width of the saved images. Specifying both the width
+            and height will resize images to an exact size, regardless of aspect ratio.
+            Specifying only width will rescale the image to that number of pixels wide
+            while preserving the aspect ratio.
+        interpolation: Type of interpolation to use when resizing images.
+        video_manager: [DEPRECATED] DO NOT USE. For backwards compatibility only.
+
+    Returns:
+        Dictionary of the format { scene_num : [image_paths] }, where scene_num is the
+        number of the scene in scene_list (starting from 1), and image_paths is a list of
+        the paths to the newly saved/created images.
+
+    Raises:
+        ValueError: Raised if any arguments are invalid or out of range (e.g.
+        if num_images is negative).
+    """
+    # TODO(v0.7): Add DeprecationWarning that `video_manager` will be removed in v0.8.
+    if video_manager is not None:
+        logger.error("`video_manager` argument is deprecated, use `video` instead.")
+        video = video_manager
+
+    if not scene_list:
+        return {}
+    if num_images <= 0 or frame_margin < 0:
+        raise ValueError()
+
+    # TODO: Validate that encoder_param is within the proper range.
+    # Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png.
+    imwrite_param = (
+        [get_cv2_imwrite_params()[image_extension], encoder_param]
+        if encoder_param is not None
+        else []
+    )
+
+    video.reset()
+
+    # Setup flags and init progress bar if available.
+    completed = True
+    logger.info(
+        f"Saving {num_images} images per scene [format={image_extension}] {output_dir if output_dir else ''} "
+    )
+    progress_bar = None
+    if show_progress:
+        progress_bar = tqdm(total=len(scene_list) * num_images, unit="images", dynamic_ncols=True)
+
+    filename_template = Template(image_name_template)
+
+    scene_num_format = "%0"
+    scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + "d"
+    image_num_format = "%0"
+    image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d"
+
+    framerate = scene_list[0][0].framerate
+
+    # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
+    timecode_list = [
+        [
+            FrameTimecode(int(f), fps=framerate)
+            for f in [
+                # middle frames
+                a[len(a) // 2]
+                if (0 < j < num_images - 1) or num_images == 1
+                # first frame
+                else min(a[0] + frame_margin, a[-1])
+                if j == 0
+                # last frame
+                else max(a[-1] - frame_margin, a[0])
+                # for each evenly-split array of frames in the scene list
+                for j, a in enumerate(np.array_split(r, num_images))
+            ]
+        ]
+        for i, r in enumerate(
+            [
+                # pad ranges to number of images
+                r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r))
+                # create range of frames in scene
+                for r in (
+                    range(
+                        start.get_frames(),
+                        start.get_frames()
+                        + max(
+                            1,  # guard against zero length scenes
+                            end.get_frames() - start.get_frames(),
+                        ),
+                    )
+                    # for each scene in scene list
+                    for start, end in scene_list
+                )
+            ]
+        )
+    ]
+
+    image_filenames = {i: [] for i in range(len(timecode_list))}
+    aspect_ratio = video.aspect_ratio
+    if abs(aspect_ratio - 1.0) < 0.01:
+        aspect_ratio = None
+
+    logger.debug("Writing images with template %s", filename_template.template)
+
+    MAX_QUEUED_ENCODE_FRAMES = 4
+    MAX_QUEUED_SAVE_IMAGES = 4
+
+    encode_queue = queue.Queue(MAX_QUEUED_ENCODE_FRAMES)
+    save_queue = queue.Queue(MAX_QUEUED_SAVE_IMAGES)
+
+    def image_encode_thread(
+        encode_queue: queue.Queue, save_queue: queue.Queue, image_extension: str
+    ):
+        while True:
+            frame_im, dest_path = encode_queue.get()
+            if frame_im is None:
+                return
+            frame_im = _scale_image(frame_im, height, width, scale, aspect_ratio, interpolation)
+            (is_ok, encoded) = cv2.imencode(f".{image_extension}", frame_im, imwrite_param)
+            if not is_ok:
+                continue
+            save_queue.put((encoded, dest_path))
+
+    def save_files_thread(save_queue: queue.Queue, progress_bar: tqdm):
+        while True:
+            encoded, dest_path = save_queue.get()
+            if encoded is None:
+                return
+            if encoded is not False:
+                encoded.tofile(Path(dest_path))
+            if progress_bar is not None:
+                progress_bar.update(1)
+
+    encode_thread = threading.Thread(
+        target=image_encode_thread,
+        args=(encode_queue, save_queue, image_extension),
+        daemon=True,
+    )
+    save_thread = threading.Thread(
+        target=save_files_thread,
+        args=(save_queue, progress_bar),
+        daemon=True,
+    )
+    encode_thread.start()
+    save_thread.start()
+    for i, scene_timecodes in enumerate(timecode_list):
+        for j, image_timecode in enumerate(scene_timecodes):
+            video.seek(image_timecode)
+            frame_im = video.read()
+            if frame_im is not None and frame_im is not False:
+                # TODO: Add extension to template.
+                # TODO: Allow NUM to be a valid suffix in addition to NUMBER.
+                file_path = "%s.%s" % (
+                    filename_template.safe_substitute(
+                        VIDEO_NAME=video.name,
+                        SCENE_NUMBER=scene_num_format % (i + 1),
+                        IMAGE_NUMBER=image_num_format % (j + 1),
+                        FRAME_NUMBER=image_timecode.get_frames(),
+                        TIMESTAMP_MS=int(image_timecode.get_seconds() * 1000),
+                        TIMECODE=image_timecode.get_timecode().replace(":", ";"),
+                    ),
+                    image_extension,
+                )
+                image_filenames[i].append(file_path)
+                encode_queue.put((frame_im, get_and_create_path(file_path, output_dir)))
+            else:
+                completed = False
+                break
+
+    # *WARNING*: We do not handle errors or exceptions yet, and this can deadlock on errors!
+    encode_queue.put((None, None))
+    save_queue.put((None, None))
+    encode_thread.join()
+    save_thread.join()
+
+    if progress_bar is not None:
+        progress_bar.close()
+
+    if not completed:
+        logger.error("Could not generate all output images.")
+
+    return image_filenames
+
+
 ##
 ## SceneManager Class Implementation
 ##