diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index 33ad89e3..ecdb1429 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -710,6 +710,52 @@ def detect_threshold_command( ctx.obj.add_detector(ThresholdDetector(**detector_args)) +@click.command('detect-hist', cls=_Command) +@click.option( + '--threshold', + '-t', + metavar='VAL', + type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val, + CONFIG_MAP['detect-hist']['threshold'].max_val), + default=None, + help='Threshold value (float) that the rgb histogram difference must exceed to trigger' + ' a new scene. Refer to frame metric hist_diff in stats file.%s' % + (USER_CONFIG.get_help_string('detect-hist', 'threshold'))) +@click.option( + '--bits', + '-b', + metavar='NUM', + type=click.INT, + default=None, + help='The number of most significant figures to keep when quantizing the RGB color channels.%s' + % (USER_CONFIG.get_help_string("detect-hist", "bits"))) +@click.option( + '--min-scene-len', + '-m', + metavar='TIMECODE', + type=click.STRING, + default=None, + help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.' + ' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,' + ' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' % + ('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string( + 'detect-hist', 'min-scene-len'))) +@click.pass_context +def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int], + min_scene_len: Optional[str]): + """Perform detection of scenes by comparing differences in the RGB histograms of adjacent + frames. + + Examples: + + detect-hist + + detect-hist --threshold 20000.0 + """ + assert isinstance(ctx.obj, CliContext) + ctx.obj.handle_detect_hist(threshold=threshold, bits=bits, min_scene_len=min_scene_len) + + @click.command('load-scenes', cls=_Command) @click.option( '--input', diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index 6588d909..2f72e9ca 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -275,6 +275,11 @@ def format(self, timecode: FrameTimecode) -> str: 'min-scene-len': TimecodeValue(0), 'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0), }, + 'detect-hist': { + 'bits': 4, + 'min-scene-len': TimecodeValue(0), + 'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0), + }, 'load-scenes': { 'start-col-name': 'Start Frame', }, diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py index 6f0e1386..36275744 100644 --- a/scenedetect/_cli/context.py +++ b/scenedetect/_cli/context.py @@ -449,6 +449,37 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]): self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name", start_col_name) + def handle_detect_hist(self, threshold: Optional[float], bits: Optional[int], + min_scene_len: Optional[str]): + """Handle `detect-hist` command options.""" + self._check_input_open() + options_processed_orig = self.options_processed + self.options_processed = False + + if self.drop_short_scenes: + min_scene_len = 0 + else: + if min_scene_len is None: + if self.config.is_default("detect-hist", "min-scene-len"): + min_scene_len = self.min_scene_len.frame_num + else: + min_scene_len = self.config.get_value("detect-hist", "min-scene-len") + min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num + + threshold = self.config.get_value("detect-hist", "threshold", threshold) + bits = self.config.get_value("detect-hist", "bits", bits) + + # Log detector args for debugging before we construct it. + logger.debug( + 'Adding detector: HistogramDetector(threshold=%f, bits=%d,' + ' min_scene_len=%d)', threshold, bits, min_scene_len) + + self._add_detector( + scenedetect.detectors.HistogramDetector( + threshold=threshold, bits=bits, min_scene_len=min_scene_len)) + + self.options_processed = options_processed_orig + def handle_export_html( self, filename: Optional[AnyStr], diff --git a/scenedetect/detectors/__init__.py b/scenedetect/detectors/__init__.py index 6aed26ae..0142f1b1 100644 --- a/scenedetect/detectors/__init__.py +++ b/scenedetect/detectors/__init__.py @@ -32,6 +32,7 @@ from scenedetect.detectors.content_detector import ContentDetector from scenedetect.detectors.threshold_detector import ThresholdDetector from scenedetect.detectors.adaptive_detector import AdaptiveDetector +from scenedetect.detectors.histogram_detector import HistogramDetector # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -51,19 +52,6 @@ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -# class HistogramDetector(SceneDetector): -# """Detects fast cuts via histogram changes between sequential frames. -# -# Detects fast cuts between content (using histogram deltas, much like the -# ContentDetector uses HSV colourspace deltas), as well as both fades and -# cuts to/from black (using a threshold, much like the ThresholdDetector). -# """ -# -# def __init__(self): -# super(DissolveDetector, self).__init__() -# -# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # -# # class MotionDetector(SceneDetector): # """Detects motion events in scenes containing a static background. # diff --git a/scenedetect/detectors/histogram_detector.py b/scenedetect/detectors/histogram_detector.py new file mode 100644 index 00000000..28d00eb5 --- /dev/null +++ b/scenedetect/detectors/histogram_detector.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# +# PySceneDetect: Python-Based Video Scene Detector +# --------------------------------------------------------------- +# [ Site: http://www.scenedetect.scenedetect.com/ ] +# [ Docs: http://manual.scenedetect.scenedetect.com/ ] +# [ Github: https://github.com/Breakthrough/PySceneDetect/ ] +# +# Copyright (C) 2014-2022 Brandon Castellano . +# PySceneDetect is licensed under the BSD 3-Clause License; see the +# included LICENSE file, or visit one of the above pages for details. +# +""":py:class:`HistogramDetector` compares the difference in the RGB histograms of subsequent +frames. If the difference exceeds a given threshold, a cut is detected. + +This detector is available from the command-line as the `detect-hist` command. +""" + +from typing import List + +import numpy + +# PySceneDetect Library Imports +from scenedetect.scene_detector import SceneDetector + + +class HistogramDetector(SceneDetector): + """Compares the difference in the RGB histograms of subsequent + frames. If the difference exceeds a given threshold, a cut is detected.""" + + METRIC_KEYS = ['hist_diff'] + + def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15): + """ + Arguments: + threshold: Threshold value (float) that the calculated difference between subsequent + histograms must exceed to trigger a new scene. + bits: Number of most significant bits to keep of the pixel values. Most videos and + images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant + bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes + quantizing the rgb histogram a bit easier and comparisons more meaningful. + min_scene_len: Minimum length of any scene. + """ + super().__init__() + self.threshold = threshold + self.bits = bits + self.min_scene_len = min_scene_len + self._hist_bins = range(2**(3 * self.bits)) + self._last_hist = None + self._last_scene_cut = None + + def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]: + """First, compress the image according to the self.bits value, then build a histogram for + the input frame. Afterward, compare against the previously analyzed frame and check if the + difference is large enough to trigger a cut. + + Arguments: + frame_num: Frame number of frame that is being passed. + frame_img: Decoded frame image (numpy.ndarray) to perform scene + detection on. + + Returns: + List of frames where scene cuts have been detected. There may be 0 + or more frames in the list, and not necessarily the same as frame_num. + """ + cut_list = [] + + np_data_type = frame_img.dtype + + if np_data_type != numpy.uint8: + raise ValueError('Image must be 8-bit rgb for HistogramDetector') + + if frame_img.shape[2] != 3: + raise ValueError('Image must have three color channels for HistogramDetector') + + # Initialize last scene cut point at the beginning of the frames of interest. + if not self._last_scene_cut: + self._last_scene_cut = frame_num + + # Quantize the image and separate the color channels + quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self.bits) + + # Perform bit shifting operations and bitwise combine color channels into one array + composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self.bits) + + # Create the histogram with a bin for every rgb value + hist, _ = numpy.histogram(composite_img, bins=self._hist_bins) + + # We can only start detecting once we have a frame to compare with. + if self._last_hist is not None: + # Compute histogram difference between frames + hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist)) + + # Check if a new scene should be triggered + if hist_diff >= self.threshold and ((frame_num - self._last_scene_cut) + >= self.min_scene_len): + cut_list.append(frame_num) + self._last_scene_cut = frame_num + + # Save stats to a StatsManager if it is being used + if self.stats_manager is not None: + self.stats_manager.set_metrics(frame_num, {self.METRIC_KEYS[0]: hist_diff}) + + self._last_hist = hist + + return cut_list + + def _quantize_frame(self, frame_img, bits): + """Quantizes the image based on the number of most significant figures to be preserved. + + Arguments: + frame_img: The 8-bit rgb image of the frame being analyzed. + bits: The number of most significant bits to keep during quantization. + + Returns: + [red_img, green_img, blue_img]: + The three separated color channels of the frame image that have been quantized. + """ + # First, find the value of the number of most significant bits, padding with zeroes + bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2) + + # Separate R, G, and B color channels and cast to int for easier bitwise operations + red_img = frame_img[:, :, 0].astype(int) + green_img = frame_img[:, :, 1].astype(int) + blue_img = frame_img[:, :, 2].astype(int) + + # Quantize the frame images + red_img = red_img & bit_value + green_img = green_img & bit_value + blue_img = blue_img & bit_value + + return [red_img, green_img, blue_img] + + def _shift_bits(self, quantized_imgs, bits): + """Takes care of the bit shifting operations to combine the RGB color + channels into a single array. + + Arguments: + quantized_imgs: A list of the three quantized images of the RGB color channels + respectively. + bits: The number of most significant bits to use for quantizing the image. + + Returns: + composite_img: The resulting array after all bitwise operations. + """ + # First, figure out how much each shift needs to be + blue_shift = 8 - bits + green_shift = 8 - 2 * bits + red_shift = 8 - 3 * bits + + # Separate our color channels for ease + red_img = quantized_imgs[0] + green_img = quantized_imgs[1] + blue_img = quantized_imgs[2] + + # Perform the bit shifting for each color + red_img = self._shift_images(img=red_img, img_shift=red_shift) + green_img = self._shift_images(img=green_img, img_shift=green_shift) + blue_img = self._shift_images(img=blue_img, img_shift=blue_shift) + + # Join our rgb arrays together + composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img)) + + return composite_img + + def _shift_images(self, img, img_shift): + """Do bitwise shifting operations for a color channel image checking for shift direction. + + Arguments: + img: A quantized image of a single color channel + img_shift: How many bits to shift the values of img. If the value is negative, the shift + direction is to the left and 8 is added to make it a positive value. + + Returns: + shifted_img: The bitwise shifted image. + """ + if img_shift < 0: + img_shift += 8 + shifted_img = numpy.left_shift(img, img_shift) + else: + shifted_img = numpy.right_shift(img, img_shift) + + return shifted_img + + def is_processing_required(self, frame_num: int) -> bool: + return True + + def get_metrics(self) -> List[str]: + return HistogramDetector.METRIC_KEYS diff --git a/tests/test_cli.py b/tests/test_cli.py index 7ba6db8a..8e5fc27f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -43,7 +43,7 @@ # TODO: Missing tests for --min-scene-len and --drop-short-scenes. SCENEDETECT_CMD = 'python -m scenedetect' -ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive'] +ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive', 'detect-hist'] ALL_BACKENDS = ['opencv', 'pyav'] DEFAULT_VIDEO_PATH = 'tests/resources/goldeneye.mp4' diff --git a/tests/test_detectors.py b/tests/test_detectors.py index b3661871..fb71ac01 100644 --- a/tests/test_detectors.py +++ b/tests/test_detectors.py @@ -24,7 +24,7 @@ import pytest from scenedetect import detect, SceneManager, FrameTimecode, StatsManager, SceneDetector -from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector +from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector, HistogramDetector from scenedetect.backends.opencv import VideoStreamCv2 @@ -47,6 +47,31 @@ def get_absolute_path(relative_path: str) -> str: return abs_path +# TODO: Add a test case for this in the fixtures defined below. +def test_histogram_detector(test_movie_clip): + """ Test SceneManager with VideoStreamCv2 and HistogramDetector. """ + TEST_MOVIE_CLIP_START_FRAMES_ACTUAL = [1199, 1226, 1260, 1281, 1334, 1365, 1590, 1697, 1871] + """Ground truth of start frame for each fast cut in `test_movie_clip`.""" + video = VideoStreamCv2(test_movie_clip) + scene_manager = SceneManager() + scene_manager.add_detector(HistogramDetector()) + scene_manager.auto_downscale = True + + video_fps = video.frame_rate + start_time = FrameTimecode('00:00:50', video_fps) + end_time = FrameTimecode('00:01:19', video_fps) + + video.seek(start_time) + scene_manager.detect_scenes(video=video, end_time=end_time) + + scene_list = scene_manager.get_scene_list() + assert len(scene_list) == len(TEST_MOVIE_CLIP_START_FRAMES_ACTUAL) + detected_start_frames = [timecode.get_frames() for timecode, _ in scene_list] + assert TEST_MOVIE_CLIP_START_FRAMES_ACTUAL == detected_start_frames + # Ensure last scene's end timecode matches the end time we set. + assert scene_list[-1][1] == end_time + + @dataclass class TestCase: """Properties for detector test cases.""" @@ -177,7 +202,7 @@ def test_detect_fades(test_case: TestCase): def test_detectors_with_stats(test_video_file): """ Test all detectors functionality with a StatsManager. """ # TODO(v1.0): Parameterize this test case (move fixture from cli to test config). - for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector]: + for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector, HistogramDetector]: video = VideoStreamCv2(test_video_file) stats = StatsManager() scene_manager = SceneManager(stats_manager=stats) diff --git a/website/pages/api.md b/website/pages/api.md index 517d4175..6620ce5a 100644 --- a/website/pages/api.md +++ b/website/pages/api.md @@ -25,6 +25,10 @@ The adaptive content detector (`detect-adaptive`) compares the difference in con The threshold-based scene detector (`detect-threshold`) is how most traditional scene detection methods work (e.g. the `ffmpeg blackframe` filter), by comparing the intensity/brightness of the current frame with a set threshold, and triggering a scene cut/break when this value crosses the threshold. In PySceneDetect, this value is computed by averaging the R, G, and B values for every pixel in the frame, yielding a single floating point number representing the average pixel value (from 0.0 to 255.0). +## Histogram Detector + +The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered. + # Creating New Detection Algorithms All scene detection algorithms must inherit from [the base `SceneDetector` class](https://scenedetect.com/projects/Manual/en/latest/api/scene_detector.html). Note that the current SceneDetector API is under development and expected to change somewhat before v1.0 is released, so make sure to pin your `scenedetect` dependency to the correct API version (e.g. `scenedetect < 0.6`, `scenedetect < 0.7`, etc...).