Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Color Histogram Detector #295

Merged
merged 18 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,52 @@ def detect_threshold_command(
ctx.obj.add_detector(ThresholdDetector(**detector_args))


@click.command('detect-hist', cls=_Command)
@click.option(
'--threshold',
'-t',
metavar='VAL',
type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val,
CONFIG_MAP['detect-hist']['threshold'].max_val),
default=None,
help='Threshold value (float) that the rgb histogram difference must exceed to trigger'
' a new scene. Refer to frame metric hist_diff in stats file.%s' %
(USER_CONFIG.get_help_string('detect-hist', 'threshold')))
@click.option(
'--bits',
'-b',
metavar='NUM',
type=click.INT,
default=None,
help='The number of most significant figures to keep when quantizing the RGB color channels.%s'
% (USER_CONFIG.get_help_string("detect-hist", "bits")))
@click.option(
'--min-scene-len',
'-m',
metavar='TIMECODE',
type=click.STRING,
default=None,
help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.'
' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,'
' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' %
('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string(
'detect-hist', 'min-scene-len')))
@click.pass_context
def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int],
min_scene_len: Optional[str]):
"""Perform detection of scenes by comparing differences in the RGB histograms of adjacent
frames.
Examples:
detect-hist
detect-hist --threshold 20000.0
"""
assert isinstance(ctx.obj, CliContext)
ctx.obj.handle_detect_hist(threshold=threshold, bits=bits, min_scene_len=min_scene_len)


@click.command('load-scenes', cls=_Command)
@click.option(
'--input',
Expand Down
5 changes: 5 additions & 0 deletions scenedetect/_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ def format(self, timecode: FrameTimecode) -> str:
'min-scene-len': TimecodeValue(0),
'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0),
},
'detect-hist': {
'bits': 4,
'min-scene-len': TimecodeValue(0),
'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0),
},
'load-scenes': {
'start-col-name': 'Start Frame',
},
Expand Down
31 changes: 31 additions & 0 deletions scenedetect/_cli/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,37 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]):
self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name",
start_col_name)

def handle_detect_hist(self, threshold: Optional[float], bits: Optional[int],
min_scene_len: Optional[str]):
"""Handle `detect-hist` command options."""
self._check_input_open()
options_processed_orig = self.options_processed
self.options_processed = False

if self.drop_short_scenes:
min_scene_len = 0
else:
if min_scene_len is None:
if self.config.is_default("detect-hist", "min-scene-len"):
min_scene_len = self.min_scene_len.frame_num
else:
min_scene_len = self.config.get_value("detect-hist", "min-scene-len")
min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num

threshold = self.config.get_value("detect-hist", "threshold", threshold)
bits = self.config.get_value("detect-hist", "bits", bits)

# Log detector args for debugging before we construct it.
logger.debug(
'Adding detector: HistogramDetector(threshold=%f, bits=%d,'
' min_scene_len=%d)', threshold, bits, min_scene_len)

self._add_detector(
scenedetect.detectors.HistogramDetector(
threshold=threshold, bits=bits, min_scene_len=min_scene_len))

self.options_processed = options_processed_orig

def handle_export_html(
self,
filename: Optional[AnyStr],
Expand Down
14 changes: 1 addition & 13 deletions scenedetect/detectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from scenedetect.detectors.content_detector import ContentDetector
from scenedetect.detectors.threshold_detector import ThresholdDetector
from scenedetect.detectors.adaptive_detector import AdaptiveDetector
from scenedetect.detectors.histogram_detector import HistogramDetector

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# #
Expand All @@ -51,19 +52,6 @@
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# class HistogramDetector(SceneDetector):
# """Detects fast cuts via histogram changes between sequential frames.
#
# Detects fast cuts between content (using histogram deltas, much like the
# ContentDetector uses HSV colourspace deltas), as well as both fades and
# cuts to/from black (using a threshold, much like the ThresholdDetector).
# """
#
# def __init__(self):
# super(DissolveDetector, self).__init__()
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# class MotionDetector(SceneDetector):
# """Detects motion events in scenes containing a static background.
#
Expand Down
189 changes: 189 additions & 0 deletions scenedetect/detectors/histogram_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
#
# PySceneDetect: Python-Based Video Scene Detector
# ---------------------------------------------------------------
# [ Site: http://www.scenedetect.scenedetect.com/ ]
# [ Docs: http://manual.scenedetect.scenedetect.com/ ]
# [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
#
# Copyright (C) 2014-2022 Brandon Castellano <http://www.bcastell.com>.
# PySceneDetect is licensed under the BSD 3-Clause License; see the
# included LICENSE file, or visit one of the above pages for details.
#
""":py:class:`HistogramDetector` compares the difference in the RGB histograms of subsequent
frames. If the difference exceeds a given threshold, a cut is detected.
This detector is available from the command-line as the `detect-hist` command.
"""

from typing import List

import numpy

# PySceneDetect Library Imports
from scenedetect.scene_detector import SceneDetector


class HistogramDetector(SceneDetector):
"""Compares the difference in the RGB histograms of subsequent
frames. If the difference exceeds a given threshold, a cut is detected."""

METRIC_KEYS = ['hist_diff']

def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15):
"""
Arguments:
threshold: Threshold value (float) that the calculated difference between subsequent
histograms must exceed to trigger a new scene.
bits: Number of most significant bits to keep of the pixel values. Most videos and
images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant
bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes
quantizing the rgb histogram a bit easier and comparisons more meaningful.
min_scene_len: Minimum length of any scene.
"""
super().__init__()
self.threshold = threshold
self.bits = bits
self.min_scene_len = min_scene_len
self._hist_bins = range(2**(3 * self.bits))
self._last_hist = None
self._last_scene_cut = None

def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
"""First, compress the image according to the self.bits value, then build a histogram for
the input frame. Afterward, compare against the previously analyzed frame and check if the
difference is large enough to trigger a cut.
Arguments:
frame_num: Frame number of frame that is being passed.
frame_img: Decoded frame image (numpy.ndarray) to perform scene
detection on.
Returns:
List of frames where scene cuts have been detected. There may be 0
or more frames in the list, and not necessarily the same as frame_num.
"""
cut_list = []

np_data_type = frame_img.dtype

if np_data_type != numpy.uint8:
raise ValueError('Image must be 8-bit rgb for HistogramDetector')

if frame_img.shape[2] != 3:
raise ValueError('Image must have three color channels for HistogramDetector')

# Initialize last scene cut point at the beginning of the frames of interest.
if not self._last_scene_cut:
self._last_scene_cut = frame_num

# Quantize the image and separate the color channels
quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self.bits)

# Perform bit shifting operations and bitwise combine color channels into one array
composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self.bits)

# Create the histogram with a bin for every rgb value
hist, _ = numpy.histogram(composite_img, bins=self._hist_bins)

# We can only start detecting once we have a frame to compare with.
if self._last_hist is not None:
# Compute histogram difference between frames
hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist))

# Check if a new scene should be triggered
if hist_diff >= self.threshold and ((frame_num - self._last_scene_cut)
>= self.min_scene_len):
cut_list.append(frame_num)
self._last_scene_cut = frame_num

# Save stats to a StatsManager if it is being used
if self.stats_manager is not None:
self.stats_manager.set_metrics(frame_num, {self.METRIC_KEYS[0]: hist_diff})

self._last_hist = hist

return cut_list

def _quantize_frame(self, frame_img, bits):
"""Quantizes the image based on the number of most significant figures to be preserved.
Arguments:
frame_img: The 8-bit rgb image of the frame being analyzed.
bits: The number of most significant bits to keep during quantization.
Returns:
[red_img, green_img, blue_img]:
The three separated color channels of the frame image that have been quantized.
"""
# First, find the value of the number of most significant bits, padding with zeroes
bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2)

# Separate R, G, and B color channels and cast to int for easier bitwise operations
red_img = frame_img[:, :, 0].astype(int)
green_img = frame_img[:, :, 1].astype(int)
blue_img = frame_img[:, :, 2].astype(int)

# Quantize the frame images
red_img = red_img & bit_value
green_img = green_img & bit_value
blue_img = blue_img & bit_value

return [red_img, green_img, blue_img]

def _shift_bits(self, quantized_imgs, bits):
"""Takes care of the bit shifting operations to combine the RGB color
channels into a single array.
Arguments:
quantized_imgs: A list of the three quantized images of the RGB color channels
respectively.
bits: The number of most significant bits to use for quantizing the image.
Returns:
composite_img: The resulting array after all bitwise operations.
"""
# First, figure out how much each shift needs to be
blue_shift = 8 - bits
green_shift = 8 - 2 * bits
red_shift = 8 - 3 * bits

# Separate our color channels for ease
red_img = quantized_imgs[0]
green_img = quantized_imgs[1]
blue_img = quantized_imgs[2]

# Perform the bit shifting for each color
red_img = self._shift_images(img=red_img, img_shift=red_shift)
green_img = self._shift_images(img=green_img, img_shift=green_shift)
blue_img = self._shift_images(img=blue_img, img_shift=blue_shift)

# Join our rgb arrays together
composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img))

return composite_img

def _shift_images(self, img, img_shift):
"""Do bitwise shifting operations for a color channel image checking for shift direction.
Arguments:
img: A quantized image of a single color channel
img_shift: How many bits to shift the values of img. If the value is negative, the shift
direction is to the left and 8 is added to make it a positive value.
Returns:
shifted_img: The bitwise shifted image.
"""
if img_shift < 0:
img_shift += 8
shifted_img = numpy.left_shift(img, img_shift)
else:
shifted_img = numpy.right_shift(img, img_shift)

return shifted_img

def is_processing_required(self, frame_num: int) -> bool:
return True

def get_metrics(self) -> List[str]:
return HistogramDetector.METRIC_KEYS
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# TODO: Missing tests for --min-scene-len and --drop-short-scenes.

SCENEDETECT_CMD = 'python -m scenedetect'
ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive']
ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive', 'detect-hist']
ALL_BACKENDS = ['opencv', 'pyav']

DEFAULT_VIDEO_PATH = 'tests/resources/goldeneye.mp4'
Expand Down
29 changes: 27 additions & 2 deletions tests/test_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pytest

from scenedetect import detect, SceneManager, FrameTimecode, StatsManager, SceneDetector
from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector
from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector, HistogramDetector
from scenedetect.backends.opencv import VideoStreamCv2


Expand All @@ -47,6 +47,31 @@ def get_absolute_path(relative_path: str) -> str:
return abs_path


# TODO: Add a test case for this in the fixtures defined below.
def test_histogram_detector(test_movie_clip):
""" Test SceneManager with VideoStreamCv2 and HistogramDetector. """
TEST_MOVIE_CLIP_START_FRAMES_ACTUAL = [1199, 1226, 1260, 1281, 1334, 1365, 1590, 1697, 1871]
"""Ground truth of start frame for each fast cut in `test_movie_clip`."""
video = VideoStreamCv2(test_movie_clip)
scene_manager = SceneManager()
scene_manager.add_detector(HistogramDetector())
scene_manager.auto_downscale = True

video_fps = video.frame_rate
start_time = FrameTimecode('00:00:50', video_fps)
end_time = FrameTimecode('00:01:19', video_fps)

video.seek(start_time)
scene_manager.detect_scenes(video=video, end_time=end_time)

scene_list = scene_manager.get_scene_list()
assert len(scene_list) == len(TEST_MOVIE_CLIP_START_FRAMES_ACTUAL)
detected_start_frames = [timecode.get_frames() for timecode, _ in scene_list]
assert TEST_MOVIE_CLIP_START_FRAMES_ACTUAL == detected_start_frames
# Ensure last scene's end timecode matches the end time we set.
assert scene_list[-1][1] == end_time


@dataclass
class TestCase:
"""Properties for detector test cases."""
Expand Down Expand Up @@ -177,7 +202,7 @@ def test_detect_fades(test_case: TestCase):
def test_detectors_with_stats(test_video_file):
""" Test all detectors functionality with a StatsManager. """
# TODO(v1.0): Parameterize this test case (move fixture from cli to test config).
for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector]:
for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector, HistogramDetector]:
video = VideoStreamCv2(test_video_file)
stats = StatsManager()
scene_manager = SceneManager(stats_manager=stats)
Expand Down
4 changes: 4 additions & 0 deletions website/pages/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ The adaptive content detector (`detect-adaptive`) compares the difference in con

The threshold-based scene detector (`detect-threshold`) is how most traditional scene detection methods work (e.g. the `ffmpeg blackframe` filter), by comparing the intensity/brightness of the current frame with a set threshold, and triggering a scene cut/break when this value crosses the threshold. In PySceneDetect, this value is computed by averaging the R, G, and B values for every pixel in the frame, yielding a single floating point number representing the average pixel value (from 0.0 to 255.0).

## Histogram Detector

The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered.

# Creating New Detection Algorithms

All scene detection algorithms must inherit from [the base `SceneDetector` class](https://scenedetect.com/projects/Manual/en/latest/api/scene_detector.html). Note that the current SceneDetector API is under development and expected to change somewhat before v1.0 is released, so make sure to pin your `scenedetect` dependency to the correct API version (e.g. `scenedetect < 0.6`, `scenedetect < 0.7`, etc...).
Expand Down
Loading