Skip to content

Commit

Permalink
[save-images] Add multithreaded version of save-images
Browse files Browse the repository at this point in the history
This improves performance by over 50% in some cases.

This should also fix #450 since we use the Path module for files now instead
of OpenCV's imwrite.
  • Loading branch information
Breakthrough committed Nov 10, 2024
1 parent 64f1bae commit 54c0a00
Show file tree
Hide file tree
Showing 3 changed files with 265 additions and 5 deletions.
2 changes: 1 addition & 1 deletion scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,7 @@ def save_images_command(
"num_images": ctx.config.get_value("save-images", "num-images", num_images),
"output_dir": output,
"scale": scale,
"show_progress": ctx.quiet_mode,
"show_progress": not ctx.quiet_mode,
"width": width,
}
ctx.add_command(cli_commands.save_images, save_images_args)
Expand Down
2 changes: 1 addition & 1 deletion scenedetect/_cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
write_scene_list_html,
)
from scenedetect.scene_manager import (
save_images as save_images_impl,
save_images_mt as save_images_impl,
)
from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge

Expand Down
266 changes: 263 additions & 3 deletions scenedetect/scene_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ def on_new_scene(frame_img: numpy.ndarray, frame_num: int):
import sys
import threading
from enum import Enum
from typing import Callable, Dict, Iterable, List, Optional, TextIO, Tuple, Union
from pathlib import Path
from typing import Callable, Dict, List, Optional, TextIO, Tuple, Union

import cv2
import numpy as np
Expand Down Expand Up @@ -392,8 +393,39 @@ def write_scene_list_html(
page.save(output_html_filename)


def _scale_image(
image: cv2.Mat,
height: int,
width: int,
scale: float,
aspect_ratio: float,
interpolation: Interpolation,
) -> cv2.Mat:
# TODO: Combine this resize with the ones below.
if aspect_ratio is not None:
image = cv2.resize(
image, (0, 0), fx=aspect_ratio, fy=1.0, interpolation=interpolation.value
)
image_height = image.shape[0]
image_width = image.shape[1]

# Figure out what kind of resizing needs to be done
if height or width:
if height and not width:
factor = height / float(image_height)
width = int(factor * image_width)
if width and not height:
factor = width / float(image_width)
height = int(factor * image_height)
assert height > 0 and width > 0
image = cv2.resize(image, (width, height), interpolation=interpolation.value)
elif scale:
image = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=interpolation.value)
return image


#
# TODO(v1.0): Consider moving all post-processing functionality into a separate submodule.
# TODO(v1.0): Move post-processing functions into separate submodule.
def save_images(
scene_list: SceneList,
video: VideoStream,
Expand Down Expand Up @@ -479,7 +511,9 @@ def save_images(

# Setup flags and init progress bar if available.
completed = True
logger.info(f"Saving {num_images} images per scene to {output_dir}, format {image_extension}")
logger.info(
f"Saving {num_images} images per scene [format={image_extension}] {output_dir if output_dir else ''} "
)
progress_bar = None
if show_progress:
progress_bar = tqdm(total=len(scene_list) * num_images, unit="images", dynamic_ncols=True)
Expand Down Expand Up @@ -597,6 +631,232 @@ def save_images(
return image_filenames


def save_images_mt(
scene_list: SceneList,
video: VideoStream,
num_images: int = 3,
frame_margin: int = 1,
image_extension: str = "jpg",
encoder_param: int = 95,
image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER",
output_dir: Optional[str] = None,
show_progress: Optional[bool] = False,
scale: Optional[float] = None,
height: Optional[int] = None,
width: Optional[int] = None,
interpolation: Interpolation = Interpolation.CUBIC,
video_manager=None,
) -> Dict[int, List[str]]:
"""Save a set number of images from each scene, given a list of scenes
and the associated video/frame source.
Arguments:
scene_list: A list of scenes (pairs of FrameTimecode objects) returned
from calling a SceneManager's detect_scenes() method.
video: A VideoStream object corresponding to the scene list.
Note that the video will be closed/re-opened and seeked through.
num_images: Number of images to generate for each scene. Minimum is 1.
frame_margin: Number of frames to pad each scene around the beginning
and end (e.g. moves the first/last image into the scene by N frames).
Can set to 0, but will result in some video files failing to extract
the very last frame.
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
encoder_param: Quality/compression efficiency, based on type of image:
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode.
image_name_template: Template to use for naming image files. Can use the template variables
$VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS.
Should not include an extension.
output_dir: Directory to output the images into. If not set, the output
is created in the working directory.
show_progress: If True, shows a progress bar if tqdm is installed.
scale: Optional factor by which to rescale saved images. A scaling factor of 1 would
not result in rescaling. A value < 1 results in a smaller saved image, while a
value > 1 results in an image larger than the original. This value is ignored if
either the height or width values are specified.
height: Optional value for the height of the saved images. Specifying both the height
and width will resize images to an exact size, regardless of aspect ratio.
Specifying only height will rescale the image to that number of pixels in height
while preserving the aspect ratio.
width: Optional value for the width of the saved images. Specifying both the width
and height will resize images to an exact size, regardless of aspect ratio.
Specifying only width will rescale the image to that number of pixels wide
while preserving the aspect ratio.
interpolation: Type of interpolation to use when resizing images.
video_manager: [DEPRECATED] DO NOT USE. For backwards compatibility only.
Returns:
Dictionary of the format { scene_num : [image_paths] }, where scene_num is the
number of the scene in scene_list (starting from 1), and image_paths is a list of
the paths to the newly saved/created images.
Raises:
ValueError: Raised if any arguments are invalid or out of range (e.g.
if num_images is negative).
"""
# TODO(v0.7): Add DeprecationWarning that `video_manager` will be removed in v0.8.
if video_manager is not None:
logger.error("`video_manager` argument is deprecated, use `video` instead.")
video = video_manager

if not scene_list:
return {}
if num_images <= 0 or frame_margin < 0:
raise ValueError()

# TODO: Validate that encoder_param is within the proper range.
# Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png.
imwrite_param = (
[get_cv2_imwrite_params()[image_extension], encoder_param]
if encoder_param is not None
else []
)

video.reset()

# Setup flags and init progress bar if available.
completed = True
logger.info(
f"Saving {num_images} images per scene [format={image_extension}] {output_dir if output_dir else ''} "
)
progress_bar = None
if show_progress:
progress_bar = tqdm(total=len(scene_list) * num_images, unit="images", dynamic_ncols=True)

filename_template = Template(image_name_template)

scene_num_format = "%0"
scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + "d"
image_num_format = "%0"
image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d"

framerate = scene_list[0][0].framerate

# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
timecode_list = [
[
FrameTimecode(int(f), fps=framerate)
for f in [
# middle frames
a[len(a) // 2]
if (0 < j < num_images - 1) or num_images == 1
# first frame
else min(a[0] + frame_margin, a[-1])
if j == 0
# last frame
else max(a[-1] - frame_margin, a[0])
# for each evenly-split array of frames in the scene list
for j, a in enumerate(np.array_split(r, num_images))
]
]
for i, r in enumerate(
[
# pad ranges to number of images
r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r))
# create range of frames in scene
for r in (
range(
start.get_frames(),
start.get_frames()
+ max(
1, # guard against zero length scenes
end.get_frames() - start.get_frames(),
),
)
# for each scene in scene list
for start, end in scene_list
)
]
)
]

image_filenames = {i: [] for i in range(len(timecode_list))}
aspect_ratio = video.aspect_ratio
if abs(aspect_ratio - 1.0) < 0.01:
aspect_ratio = None

logger.debug("Writing images with template %s", filename_template.template)

MAX_QUEUED_ENCODE_FRAMES = 4
MAX_QUEUED_SAVE_IMAGES = 4

encode_queue = queue.Queue(MAX_QUEUED_ENCODE_FRAMES)
save_queue = queue.Queue(MAX_QUEUED_SAVE_IMAGES)

def image_encode_thread(
encode_queue: queue.Queue, save_queue: queue.Queue, image_extension: str
):
while True:
frame_im, dest_path = encode_queue.get()
if frame_im is None:
return
frame_im = _scale_image(frame_im, height, width, scale, aspect_ratio, interpolation)
(is_ok, encoded) = cv2.imencode(f".{image_extension}", frame_im, imwrite_param)
if not is_ok:
continue
save_queue.put((encoded, dest_path))

def save_files_thread(save_queue: queue.Queue, progress_bar: tqdm):
while True:
encoded, dest_path = save_queue.get()
if encoded is None:
return
if encoded is not False:
encoded.tofile(Path(dest_path))
if progress_bar is not None:
progress_bar.update(1)

encode_thread = threading.Thread(
target=image_encode_thread,
args=(encode_queue, save_queue, image_extension),
daemon=True,
)
save_thread = threading.Thread(
target=save_files_thread,
args=(save_queue, progress_bar),
daemon=True,
)
encode_thread.start()
save_thread.start()
for i, scene_timecodes in enumerate(timecode_list):
for j, image_timecode in enumerate(scene_timecodes):
video.seek(image_timecode)
frame_im = video.read()
if frame_im is not None and frame_im is not False:
# TODO: Add extension to template.
# TODO: Allow NUM to be a valid suffix in addition to NUMBER.
file_path = "%s.%s" % (
filename_template.safe_substitute(
VIDEO_NAME=video.name,
SCENE_NUMBER=scene_num_format % (i + 1),
IMAGE_NUMBER=image_num_format % (j + 1),
FRAME_NUMBER=image_timecode.get_frames(),
TIMESTAMP_MS=int(image_timecode.get_seconds() * 1000),
TIMECODE=image_timecode.get_timecode().replace(":", ";"),
),
image_extension,
)
image_filenames[i].append(file_path)
encode_queue.put((frame_im, get_and_create_path(file_path, output_dir)))
else:
completed = False
break

# *WARNING*: We do not handle errors or exceptions yet, and this can deadlock on errors!
encode_queue.put((None, None))
save_queue.put((None, None))
encode_thread.join()
save_thread.join()

if progress_bar is not None:
progress_bar.close()

if not completed:
logger.error("Could not generate all output images.")

return image_filenames


##
## SceneManager Class Implementation
##
Expand Down

0 comments on commit 54c0a00

Please sign in to comment.