Skip to content

Commit

Permalink
Color Histogram Detector (#295)
Browse files Browse the repository at this point in the history
* Initial implementation of HistogramDetector.

* Added check for color channels

* Added tests for detect-hist.

* Added documentation for detect-hist.

* Add detect-hist to test_cli

* Fix formatting

* Fix test_histogram_detector

* Move detect-hist to new location.

* Delete scenedetect/cli/__init__.py

Moved to scenedetect/_cli/__init__.py

* Add config options for detect-hist

* Update config.py

* Update __init__.py

* Update config.py

---------

Co-authored-by: Brandon Castellano <[email protected]>
  • Loading branch information
wjs018 and Breakthrough authored Apr 17, 2024
1 parent cc6b863 commit 2450144
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 16 deletions.
46 changes: 46 additions & 0 deletions scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,52 @@ def detect_threshold_command(
ctx.obj.add_detector(ThresholdDetector(**detector_args))


@click.command('detect-hist', cls=_Command)
@click.option(
'--threshold',
'-t',
metavar='VAL',
type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val,
CONFIG_MAP['detect-hist']['threshold'].max_val),
default=None,
help='Threshold value (float) that the rgb histogram difference must exceed to trigger'
' a new scene. Refer to frame metric hist_diff in stats file.%s' %
(USER_CONFIG.get_help_string('detect-hist', 'threshold')))
@click.option(
'--bits',
'-b',
metavar='NUM',
type=click.INT,
default=None,
help='The number of most significant figures to keep when quantizing the RGB color channels.%s'
% (USER_CONFIG.get_help_string("detect-hist", "bits")))
@click.option(
'--min-scene-len',
'-m',
metavar='TIMECODE',
type=click.STRING,
default=None,
help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.'
' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,'
' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' %
('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string(
'detect-hist', 'min-scene-len')))
@click.pass_context
def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int],
min_scene_len: Optional[str]):
"""Perform detection of scenes by comparing differences in the RGB histograms of adjacent
frames.
Examples:
detect-hist
detect-hist --threshold 20000.0
"""
assert isinstance(ctx.obj, CliContext)
ctx.obj.handle_detect_hist(threshold=threshold, bits=bits, min_scene_len=min_scene_len)


@click.command('load-scenes', cls=_Command)
@click.option(
'--input',
Expand Down
5 changes: 5 additions & 0 deletions scenedetect/_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,11 @@ def format(self, timecode: FrameTimecode) -> str:
'min-scene-len': TimecodeValue(0),
'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0),
},
'detect-hist': {
'bits': 4,
'min-scene-len': TimecodeValue(0),
'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0),
},
'load-scenes': {
'start-col-name': 'Start Frame',
},
Expand Down
31 changes: 31 additions & 0 deletions scenedetect/_cli/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,37 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]):
self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name",
start_col_name)

def handle_detect_hist(self, threshold: Optional[float], bits: Optional[int],
min_scene_len: Optional[str]):
"""Handle `detect-hist` command options."""
self._check_input_open()
options_processed_orig = self.options_processed
self.options_processed = False

if self.drop_short_scenes:
min_scene_len = 0
else:
if min_scene_len is None:
if self.config.is_default("detect-hist", "min-scene-len"):
min_scene_len = self.min_scene_len.frame_num
else:
min_scene_len = self.config.get_value("detect-hist", "min-scene-len")
min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num

threshold = self.config.get_value("detect-hist", "threshold", threshold)
bits = self.config.get_value("detect-hist", "bits", bits)

# Log detector args for debugging before we construct it.
logger.debug(
'Adding detector: HistogramDetector(threshold=%f, bits=%d,'
' min_scene_len=%d)', threshold, bits, min_scene_len)

self._add_detector(
scenedetect.detectors.HistogramDetector(
threshold=threshold, bits=bits, min_scene_len=min_scene_len))

self.options_processed = options_processed_orig

def handle_export_html(
self,
filename: Optional[AnyStr],
Expand Down
14 changes: 1 addition & 13 deletions scenedetect/detectors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from scenedetect.detectors.content_detector import ContentDetector
from scenedetect.detectors.threshold_detector import ThresholdDetector
from scenedetect.detectors.adaptive_detector import AdaptiveDetector
from scenedetect.detectors.histogram_detector import HistogramDetector

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# #
Expand All @@ -51,19 +52,6 @@
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# class HistogramDetector(SceneDetector):
# """Detects fast cuts via histogram changes between sequential frames.
#
# Detects fast cuts between content (using histogram deltas, much like the
# ContentDetector uses HSV colourspace deltas), as well as both fades and
# cuts to/from black (using a threshold, much like the ThresholdDetector).
# """
#
# def __init__(self):
# super(DissolveDetector, self).__init__()
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# class MotionDetector(SceneDetector):
# """Detects motion events in scenes containing a static background.
#
Expand Down
189 changes: 189 additions & 0 deletions scenedetect/detectors/histogram_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
#
# PySceneDetect: Python-Based Video Scene Detector
# ---------------------------------------------------------------
# [ Site: http://www.scenedetect.scenedetect.com/ ]
# [ Docs: http://manual.scenedetect.scenedetect.com/ ]
# [ Github: https://github.com/Breakthrough/PySceneDetect/ ]
#
# Copyright (C) 2014-2022 Brandon Castellano <http://www.bcastell.com>.
# PySceneDetect is licensed under the BSD 3-Clause License; see the
# included LICENSE file, or visit one of the above pages for details.
#
""":py:class:`HistogramDetector` compares the difference in the RGB histograms of subsequent
frames. If the difference exceeds a given threshold, a cut is detected.
This detector is available from the command-line as the `detect-hist` command.
"""

from typing import List

import numpy

# PySceneDetect Library Imports
from scenedetect.scene_detector import SceneDetector


class HistogramDetector(SceneDetector):
"""Compares the difference in the RGB histograms of subsequent
frames. If the difference exceeds a given threshold, a cut is detected."""

METRIC_KEYS = ['hist_diff']

def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15):
"""
Arguments:
threshold: Threshold value (float) that the calculated difference between subsequent
histograms must exceed to trigger a new scene.
bits: Number of most significant bits to keep of the pixel values. Most videos and
images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant
bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes
quantizing the rgb histogram a bit easier and comparisons more meaningful.
min_scene_len: Minimum length of any scene.
"""
super().__init__()
self.threshold = threshold
self.bits = bits
self.min_scene_len = min_scene_len
self._hist_bins = range(2**(3 * self.bits))
self._last_hist = None
self._last_scene_cut = None

def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
"""First, compress the image according to the self.bits value, then build a histogram for
the input frame. Afterward, compare against the previously analyzed frame and check if the
difference is large enough to trigger a cut.
Arguments:
frame_num: Frame number of frame that is being passed.
frame_img: Decoded frame image (numpy.ndarray) to perform scene
detection on.
Returns:
List of frames where scene cuts have been detected. There may be 0
or more frames in the list, and not necessarily the same as frame_num.
"""
cut_list = []

np_data_type = frame_img.dtype

if np_data_type != numpy.uint8:
raise ValueError('Image must be 8-bit rgb for HistogramDetector')

if frame_img.shape[2] != 3:
raise ValueError('Image must have three color channels for HistogramDetector')

# Initialize last scene cut point at the beginning of the frames of interest.
if not self._last_scene_cut:
self._last_scene_cut = frame_num

# Quantize the image and separate the color channels
quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self.bits)

# Perform bit shifting operations and bitwise combine color channels into one array
composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self.bits)

# Create the histogram with a bin for every rgb value
hist, _ = numpy.histogram(composite_img, bins=self._hist_bins)

# We can only start detecting once we have a frame to compare with.
if self._last_hist is not None:
# Compute histogram difference between frames
hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist))

# Check if a new scene should be triggered
if hist_diff >= self.threshold and ((frame_num - self._last_scene_cut)
>= self.min_scene_len):
cut_list.append(frame_num)
self._last_scene_cut = frame_num

# Save stats to a StatsManager if it is being used
if self.stats_manager is not None:
self.stats_manager.set_metrics(frame_num, {self.METRIC_KEYS[0]: hist_diff})

self._last_hist = hist

return cut_list

def _quantize_frame(self, frame_img, bits):
"""Quantizes the image based on the number of most significant figures to be preserved.
Arguments:
frame_img: The 8-bit rgb image of the frame being analyzed.
bits: The number of most significant bits to keep during quantization.
Returns:
[red_img, green_img, blue_img]:
The three separated color channels of the frame image that have been quantized.
"""
# First, find the value of the number of most significant bits, padding with zeroes
bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2)

# Separate R, G, and B color channels and cast to int for easier bitwise operations
red_img = frame_img[:, :, 0].astype(int)
green_img = frame_img[:, :, 1].astype(int)
blue_img = frame_img[:, :, 2].astype(int)

# Quantize the frame images
red_img = red_img & bit_value
green_img = green_img & bit_value
blue_img = blue_img & bit_value

return [red_img, green_img, blue_img]

def _shift_bits(self, quantized_imgs, bits):
"""Takes care of the bit shifting operations to combine the RGB color
channels into a single array.
Arguments:
quantized_imgs: A list of the three quantized images of the RGB color channels
respectively.
bits: The number of most significant bits to use for quantizing the image.
Returns:
composite_img: The resulting array after all bitwise operations.
"""
# First, figure out how much each shift needs to be
blue_shift = 8 - bits
green_shift = 8 - 2 * bits
red_shift = 8 - 3 * bits

# Separate our color channels for ease
red_img = quantized_imgs[0]
green_img = quantized_imgs[1]
blue_img = quantized_imgs[2]

# Perform the bit shifting for each color
red_img = self._shift_images(img=red_img, img_shift=red_shift)
green_img = self._shift_images(img=green_img, img_shift=green_shift)
blue_img = self._shift_images(img=blue_img, img_shift=blue_shift)

# Join our rgb arrays together
composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img))

return composite_img

def _shift_images(self, img, img_shift):
"""Do bitwise shifting operations for a color channel image checking for shift direction.
Arguments:
img: A quantized image of a single color channel
img_shift: How many bits to shift the values of img. If the value is negative, the shift
direction is to the left and 8 is added to make it a positive value.
Returns:
shifted_img: The bitwise shifted image.
"""
if img_shift < 0:
img_shift += 8
shifted_img = numpy.left_shift(img, img_shift)
else:
shifted_img = numpy.right_shift(img, img_shift)

return shifted_img

def is_processing_required(self, frame_num: int) -> bool:
return True

def get_metrics(self) -> List[str]:
return HistogramDetector.METRIC_KEYS
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# TODO: Missing tests for --min-scene-len and --drop-short-scenes.

SCENEDETECT_CMD = 'python -m scenedetect'
ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive']
ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive', 'detect-hist']
ALL_BACKENDS = ['opencv', 'pyav']

DEFAULT_VIDEO_PATH = 'tests/resources/goldeneye.mp4'
Expand Down
29 changes: 27 additions & 2 deletions tests/test_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import pytest

from scenedetect import detect, SceneManager, FrameTimecode, StatsManager, SceneDetector
from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector
from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector, HistogramDetector
from scenedetect.backends.opencv import VideoStreamCv2


Expand All @@ -47,6 +47,31 @@ def get_absolute_path(relative_path: str) -> str:
return abs_path


# TODO: Add a test case for this in the fixtures defined below.
def test_histogram_detector(test_movie_clip):
""" Test SceneManager with VideoStreamCv2 and HistogramDetector. """
TEST_MOVIE_CLIP_START_FRAMES_ACTUAL = [1199, 1226, 1260, 1281, 1334, 1365, 1590, 1697, 1871]
"""Ground truth of start frame for each fast cut in `test_movie_clip`."""
video = VideoStreamCv2(test_movie_clip)
scene_manager = SceneManager()
scene_manager.add_detector(HistogramDetector())
scene_manager.auto_downscale = True

video_fps = video.frame_rate
start_time = FrameTimecode('00:00:50', video_fps)
end_time = FrameTimecode('00:01:19', video_fps)

video.seek(start_time)
scene_manager.detect_scenes(video=video, end_time=end_time)

scene_list = scene_manager.get_scene_list()
assert len(scene_list) == len(TEST_MOVIE_CLIP_START_FRAMES_ACTUAL)
detected_start_frames = [timecode.get_frames() for timecode, _ in scene_list]
assert TEST_MOVIE_CLIP_START_FRAMES_ACTUAL == detected_start_frames
# Ensure last scene's end timecode matches the end time we set.
assert scene_list[-1][1] == end_time


@dataclass
class TestCase:
__test__ = False
Expand Down Expand Up @@ -178,7 +203,7 @@ def test_detect_fades(test_case: TestCase):
def test_detectors_with_stats(test_video_file):
""" Test all detectors functionality with a StatsManager. """
# TODO(v1.0): Parameterize this test case (move fixture from cli to test config).
for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector]:
for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector, HistogramDetector]:
video = VideoStreamCv2(test_video_file)
stats = StatsManager()
scene_manager = SceneManager(stats_manager=stats)
Expand Down
4 changes: 4 additions & 0 deletions website/pages/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ The adaptive content detector (`detect-adaptive`) compares the difference in con

The threshold-based scene detector (`detect-threshold`) is how most traditional scene detection methods work (e.g. the `ffmpeg blackframe` filter), by comparing the intensity/brightness of the current frame with a set threshold, and triggering a scene cut/break when this value crosses the threshold. In PySceneDetect, this value is computed by averaging the R, G, and B values for every pixel in the frame, yielding a single floating point number representing the average pixel value (from 0.0 to 255.0).

## Histogram Detector

The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered.

# Creating New Detection Algorithms

All scene detection algorithms must inherit from [the base `SceneDetector` class](https://scenedetect.com/projects/Manual/en/latest/api/scene_detector.html). Note that the current SceneDetector API is under development and expected to change somewhat before v1.0 is released, so make sure to pin your `scenedetect` dependency to the correct API version (e.g. `scenedetect < 0.6`, `scenedetect < 0.7`, etc...).
Expand Down

0 comments on commit 2450144

Please sign in to comment.