Skip to content

Commit

Permalink
[save-images] Make all threads exception-safe
Browse files Browse the repository at this point in the history
Ensure errors are re-raised safely from worker threads by using non-blocking
puts and monitoring a common error queue.
  • Loading branch information
Breakthrough committed Nov 24, 2024
1 parent 99eee13 commit 55dec7f
Show file tree
Hide file tree
Showing 4 changed files with 337 additions and 379 deletions.
302 changes: 0 additions & 302 deletions scenedetect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,8 @@
:class:`SceneManager <scenedetect.scene_manager.SceneManager>`.
"""

import math
import queue
import threading
import typing as ty
from dataclasses import dataclass
from logging import getLogger
from pathlib import Path
from string import Template

# OpenCV is a required package, but we don't have it as an explicit dependency since we
# need to support both opencv-python and opencv-python-headless. Include some additional
Expand Down Expand Up @@ -182,299 +176,3 @@ def detect(
if scene_manager.stats_manager is not None:
scene_manager.stats_manager.save_to_csv(csv_file=stats_file_path)
return scene_manager.get_scene_list(start_in_scene=start_in_scene)


# TODO: Just merge these variables into the extractor.
@dataclass
class ImageExtractorConfig:
num_images: int = 3
"""Number of images to generate for each scene. Minimum is 1."""
frame_margin: int = 1
"""Number of frames to pad each scene around the beginning
and end (e.g. moves the first/last image into the scene by N frames).
Can set to 0, but will result in some video files failing to extract
the very last frame."""
image_extension: str = "jpg"
"""Type of image to save (must be one of 'jpg', 'png', or 'webp')."""
encoder_param: int = 95
"""Quality/compression efficiency, based on type of image:
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode."""
image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER"
"""Template to use for naming image files. Can use the template variables
$VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS.
Should not include an extension."""
scale: ty.Optional[float] = None
"""Optional factor by which to rescale saved images. A scaling factor of 1 would
not result in rescaling. A value < 1 results in a smaller saved image, while a
value > 1 results in an image larger than the original. This value is ignored if
either the height or width values are specified."""
height: ty.Optional[int] = None
"""Optional value for the height of the saved images. Specifying both the height
and width will resize images to an exact size, regardless of aspect ratio.
Specifying only height will rescale the image to that number of pixels in height
while preserving the aspect ratio."""
width: ty.Optional[int] = None
"""Optional value for the width of the saved images. Specifying both the width
and height will resize images to an exact size, regardless of aspect ratio.
Specifying only width will rescale the image to that number of pixels wide
while preserving the aspect ratio."""
interpolation: Interpolation = Interpolation.CUBIC
"""Type of interpolation to use when resizing images."""


class ImageExtractor:
def __init__(
self,
num_images: int = 3,
frame_margin: int = 1,
image_extension: str = "jpg",
encoder_param: int = 95,
image_name_template: str = "$VIDEO_NAME-Scene-$SCENE_NUMBER-$IMAGE_NUMBER",
scale: ty.Optional[float] = None,
height: ty.Optional[int] = None,
width: ty.Optional[int] = None,
interpolation: Interpolation = Interpolation.CUBIC,
):
"""Helper type to handle saving images for a set of scenes. This object is *not* thread-safe.
Arguments:
num_images: Number of images to generate for each scene. Minimum is 1.
frame_margin: Number of frames to pad each scene around the beginning
and end (e.g. moves the first/last image into the scene by N frames).
Can set to 0, but will result in some video files failing to extract
the very last frame.
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
encoder_param: Quality/compression efficiency, based on type of image:
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
'png': Compression from 1-9, where 9 achieves best filesize but is slower to encode.
image_name_template: Template to use for output filanames. Can use template variables
$VIDEO_NAME, $SCENE_NUMBER, $IMAGE_NUMBER, $TIMECODE, $FRAME_NUMBER, $TIMESTAMP_MS.
*NOTE*: Should not include the image extension (set `image_extension` instead).
scale: Optional factor by which to rescale saved images. A scaling factor of 1 would
not result in rescaling. A value < 1 results in a smaller saved image, while a
value > 1 results in an image larger than the original. This value is ignored if
either the height or width values are specified.
height: Optional value for the height of the saved images. Specifying both the height
and width will resize images to an exact size, regardless of aspect ratio.
Specifying only height will rescale the image to that number of pixels in height
while preserving the aspect ratio.
width: Optional value for the width of the saved images. Specifying both the width
and height will resize images to an exact size, regardless of aspect ratio.
Specifying only width will rescale the image to that number of pixels wide
while preserving the aspect ratio.
interpolation: Type of interpolation to use when resizing images.
"""
self._num_images = num_images
self._frame_margin = frame_margin
self._image_extension = image_extension
self._encoder_param = encoder_param
self._image_name_template = image_name_template
self._scale = scale
self._height = height
self._width = width
self._interpolation = interpolation

def run(
self,
video: VideoStream,
scene_list: SceneList,
output_dir: ty.Optional[str] = None,
show_progress=False,
) -> ty.Dict[int, ty.List[str]]:
if not scene_list:
return {}
if self._num_images <= 0 or self._frame_margin < 0:
raise ValueError()

video.reset()

# Setup flags and init progress bar if available.
completed = True
logger.info(
f"Saving {self._num_images} images per scene [format={self._image_extension}] {output_dir if output_dir else ''} "
)
progress_bar = None
if show_progress:
progress_bar = tqdm(
total=len(scene_list) * self._num_images, unit="images", dynamic_ncols=True
)

filename_template = Template(self._image_name_template)
scene_num_format = "%0"
scene_num_format += str(max(3, math.floor(math.log(len(scene_list), 10)) + 1)) + "d"
image_num_format = "%0"
image_num_format += str(math.floor(math.log(self._num_images, 10)) + 2) + "d"

timecode_list = self.generate_timecode_list(scene_list)
image_filenames = {i: [] for i in range(len(timecode_list))}
logger.debug("Writing images with template %s", filename_template.template)

MAX_QUEUED_ENCODE_FRAMES = 4
MAX_QUEUED_SAVE_IMAGES = 4
encode_queue = queue.Queue(MAX_QUEUED_ENCODE_FRAMES)
save_queue = queue.Queue(MAX_QUEUED_SAVE_IMAGES)
encode_thread = threading.Thread(
target=self._image_encode_thread,
args=(video, encode_queue, save_queue, self._image_extension),
daemon=True,
)
save_thread = threading.Thread(
target=self._save_files_thread,
args=(save_queue, progress_bar),
daemon=True,
)
encode_thread.start()
save_thread.start()

for i, scene_timecodes in enumerate(timecode_list):
for j, image_timecode in enumerate(scene_timecodes):
video.seek(image_timecode)
frame_im = video.read()
if frame_im is not None and frame_im is not False:
# TODO: Add extension to template.
# TODO: Allow NUM to be a valid suffix in addition to NUMBER.
file_path = "%s.%s" % (
filename_template.safe_substitute(
VIDEO_NAME=video.name,
SCENE_NUMBER=scene_num_format % (i + 1),
IMAGE_NUMBER=image_num_format % (j + 1),
FRAME_NUMBER=image_timecode.get_frames(),
TIMESTAMP_MS=int(image_timecode.get_seconds() * 1000),
TIMECODE=image_timecode.get_timecode().replace(":", ";"),
),
self._image_extension,
)
image_filenames[i].append(file_path)
encode_queue.put((frame_im, get_and_create_path(file_path, output_dir)))
else:
completed = False
break

# *WARNING*: We do not handle errors or exceptions yet, and this can deadlock on errors!
encode_queue.put((None, None))
save_queue.put((None, None))
encode_thread.join()
save_thread.join()
if progress_bar is not None:
progress_bar.close()
if not completed:
logger.error("Could not generate all output images.")

return image_filenames

def _image_encode_thread(
self,
video: VideoStream,
encode_queue: queue.Queue,
save_queue: queue.Queue,
image_extension: str,
):
aspect_ratio = video.aspect_ratio
if abs(aspect_ratio - 1.0) < 0.01:
aspect_ratio = None
# TODO: Validate that encoder_param is within the proper range.
# Should be between 0 and 100 (inclusive) for jpg/webp, and 1-9 for png.
imwrite_param = (
[get_cv2_imwrite_params()[self._image_extension], self._encoder_param]
if self._encoder_param is not None
else []
)
while True:
frame_im, dest_path = encode_queue.get()
if frame_im is None:
return
frame_im = self.resize_image(
frame_im,
aspect_ratio,
)
(is_ok, encoded) = cv2.imencode(f".{image_extension}", frame_im, imwrite_param)
if not is_ok:
continue
save_queue.put((encoded, dest_path))

def _save_files_thread(self, save_queue: queue.Queue, progress_bar: tqdm):
while True:
encoded, dest_path = save_queue.get()
if encoded is None:
return
if encoded is not False:
encoded.tofile(Path(dest_path))
if progress_bar is not None:
progress_bar.update(1)

def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]:
"""Generates a list of timecodes for each scene in `scene_list` based on the current config
parameters."""
framerate = scene_list[0][0].framerate
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
return [
(
FrameTimecode(int(f), fps=framerate)
for f in (
# middle frames
a[len(a) // 2]
if (0 < j < self._num_images - 1) or self._num_images == 1
# first frame
else min(a[0] + self._frame_margin, a[-1])
if j == 0
# last frame
else max(a[-1] - self._frame_margin, a[0])
# for each evenly-split array of frames in the scene list
for j, a in enumerate(np.array_split(r, self._num_images))
)
)
for r in (
# pad ranges to number of images
r
if 1 + r[-1] - r[0] >= self._num_images
else list(r) + [r[-1]] * (self._num_images - len(r))
# create range of frames in scene
for r in (
range(
start.get_frames(),
start.get_frames()
+ max(
1, # guard against zero length scenes
end.get_frames() - start.get_frames(),
),
)
# for each scene in scene list
for start, end in scene_list
)
)
]

def resize_image(
self,
image: cv2.Mat,
aspect_ratio: float,
) -> cv2.Mat:
"""Resizes the given `image` according to the current config parameters. `aspect_ratio` is
used to correct for non-square pixels."""
# TODO: Combine this resize with the ones below.
if aspect_ratio is not None:
image = cv2.resize(
image, (0, 0), fx=aspect_ratio, fy=1.0, interpolation=self._interpolation.value
)
image_height = image.shape[0]
image_width = image.shape[1]
# Figure out what kind of resizing needs to be done
if self._height or self._width:
if self._height and not self._width:
factor = self._height / float(image_height)
width = int(factor * image_width)
if self._width and not self._height:
factor = width / float(image_width)
height = int(factor * image_height)
assert height > 0 and width > 0
image = cv2.resize(image, (width, height), interpolation=self._interpolation.value)
elif self._scale:
image = cv2.resize(
image,
(0, 0),
fx=self._scale,
fy=self._scale,
interpolation=self._interpolation.value,
)
return image
4 changes: 1 addition & 3 deletions scenedetect/_cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@
write_scene_list,
write_scene_list_html,
)
from scenedetect.scene_manager import (
save_images_mt as save_images_impl,
)
from scenedetect.scene_manager import save_images as save_images_impl
from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge

logger = logging.getLogger("pyscenedetect")
Expand Down
Loading

0 comments on commit 55dec7f

Please sign in to comment.