Color Histogram Detector (#295)

* Initial implementation of HistogramDetector. * Added check for color channels * Added tests for detect-hist. * Added documentation for detect-hist. * Add detect-hist to test_cli * Fix formatting * Fix test_histogram_detector * Move detect-hist to new location. * Delete scenedetect/cli/__init__.py Moved to scenedetect/_cli/__init__.py * Add config options for detect-hist * Update config.py * Update __init__.py * Update config.py --------- Co-authored-by: Brandon Castellano <[email protected]>
Breakthrough · Apr 17, 2024 · 2450144 · 2450144
1 parent cc6b863
commit 2450144
Show file tree

Hide file tree

Showing 8 changed files with 304 additions and 16 deletions.
diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py
@@ -710,6 +710,52 @@ def detect_threshold_command(
     ctx.obj.add_detector(ThresholdDetector(**detector_args))
 
 
+@click.command('detect-hist', cls=_Command)
+@click.option(
+    '--threshold',
+    '-t',
+    metavar='VAL',
+    type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val,
+                          CONFIG_MAP['detect-hist']['threshold'].max_val),
+    default=None,
+    help='Threshold value (float) that the rgb histogram difference must exceed to trigger'
+    ' a new scene. Refer to frame metric hist_diff in stats file.%s' %
+    (USER_CONFIG.get_help_string('detect-hist', 'threshold')))
+@click.option(
+    '--bits',
+    '-b',
+    metavar='NUM',
+    type=click.INT,
+    default=None,
+    help='The number of most significant figures to keep when quantizing the RGB color channels.%s'
+    % (USER_CONFIG.get_help_string("detect-hist", "bits")))
+@click.option(
+    '--min-scene-len',
+    '-m',
+    metavar='TIMECODE',
+    type=click.STRING,
+    default=None,
+    help='Minimum length of any scene. Overrides global min-scene-len (-m) setting.'
+    ' TIMECODE can be specified as exact number of frames, a time in seconds followed by s,'
+    ' or a timecode in the format HH:MM:SS or HH:MM:SS.nnn.%s' %
+    ('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string(
+        'detect-hist', 'min-scene-len')))
+@click.pass_context
+def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int],
+                        min_scene_len: Optional[str]):
+    """Perform detection of scenes by comparing differences in the RGB histograms of adjacent
+    frames.
+
+    Examples:
+
+        detect-hist
+
+        detect-hist --threshold 20000.0
+    """
+    assert isinstance(ctx.obj, CliContext)
+    ctx.obj.handle_detect_hist(threshold=threshold, bits=bits, min_scene_len=min_scene_len)
+
+
 @click.command('load-scenes', cls=_Command)
 @click.option(
     '--input',

diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py
@@ -275,6 +275,11 @@ def format(self, timecode: FrameTimecode) -> str:
         'min-scene-len': TimecodeValue(0),
         'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0),
     },
+    'detect-hist': {
+        'bits': 4,
+        'min-scene-len': TimecodeValue(0),
+        'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0),
+    },
     'load-scenes': {
         'start-col-name': 'Start Frame',
     },

diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py
@@ -449,6 +449,37 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]):
         self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name",
                                                              start_col_name)
 
+    def handle_detect_hist(self, threshold: Optional[float], bits: Optional[int],
+                           min_scene_len: Optional[str]):
+        """Handle `detect-hist` command options."""
+        self._check_input_open()
+        options_processed_orig = self.options_processed
+        self.options_processed = False
+
+        if self.drop_short_scenes:
+            min_scene_len = 0
+        else:
+            if min_scene_len is None:
+                if self.config.is_default("detect-hist", "min-scene-len"):
+                    min_scene_len = self.min_scene_len.frame_num
+                else:
+                    min_scene_len = self.config.get_value("detect-hist", "min-scene-len")
+            min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num
+
+        threshold = self.config.get_value("detect-hist", "threshold", threshold)
+        bits = self.config.get_value("detect-hist", "bits", bits)
+
+        # Log detector args for debugging before we construct it.
+        logger.debug(
+            'Adding detector: HistogramDetector(threshold=%f, bits=%d,'
+            ' min_scene_len=%d)', threshold, bits, min_scene_len)
+
+        self._add_detector(
+            scenedetect.detectors.HistogramDetector(
+                threshold=threshold, bits=bits, min_scene_len=min_scene_len))
+
+        self.options_processed = options_processed_orig
+
     def handle_export_html(
         self,
         filename: Optional[AnyStr],

diff --git a/scenedetect/detectors/__init__.py b/scenedetect/detectors/__init__.py
@@ -32,6 +32,7 @@
 from scenedetect.detectors.content_detector import ContentDetector
 from scenedetect.detectors.threshold_detector import ThresholdDetector
 from scenedetect.detectors.adaptive_detector import AdaptiveDetector
+from scenedetect.detectors.histogram_detector import HistogramDetector
 
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 #                                                                             #
@@ -51,19 +52,6 @@
 #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 #
-# class HistogramDetector(SceneDetector):
-#    """Detects fast cuts via histogram changes between sequential frames.
-#
-#    Detects fast cuts between content (using histogram deltas, much like the
-#    ContentDetector uses HSV colourspace deltas), as well as both fades and
-#    cuts to/from black (using a threshold, much like the ThresholdDetector).
-#    """
-#
-#    def __init__(self):
-#        super(DissolveDetector, self).__init__()
-#
-# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
-#
 # class MotionDetector(SceneDetector):
 #    """Detects motion events in scenes containing a static background.
 #

diff --git a/scenedetect/detectors/histogram_detector.py b/scenedetect/detectors/histogram_detector.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+#
+#         PySceneDetect: Python-Based Video Scene Detector
+#   ---------------------------------------------------------------
+#     [  Site:   http://www.scenedetect.scenedetect.com/         ]
+#     [  Docs:   http://manual.scenedetect.scenedetect.com/      ]
+#     [  Github: https://github.com/Breakthrough/PySceneDetect/  ]
+#
+# Copyright (C) 2014-2022 Brandon Castellano <http://www.bcastell.com>.
+# PySceneDetect is licensed under the BSD 3-Clause License; see the
+# included LICENSE file, or visit one of the above pages for details.
+#
+""":py:class:`HistogramDetector` compares the difference in the RGB histograms of subsequent
+frames. If the difference exceeds a given threshold, a cut is detected.
+
+This detector is available from the command-line as the `detect-hist` command.
+"""
+
+from typing import List
+
+import numpy
+
+# PySceneDetect Library Imports
+from scenedetect.scene_detector import SceneDetector
+
+
+class HistogramDetector(SceneDetector):
+    """Compares the difference in the RGB histograms of subsequent
+    frames. If the difference exceeds a given threshold, a cut is detected."""
+
+    METRIC_KEYS = ['hist_diff']
+
+    def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15):
+        """
+        Arguments:
+            threshold: Threshold value (float) that the calculated difference between subsequent
+                histograms must exceed to trigger a new scene.
+            bits: Number of most significant bits to keep of the pixel values. Most videos and
+                images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant
+                bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes
+                quantizing the rgb histogram a bit easier and comparisons more meaningful.
+            min_scene_len:  Minimum length of any scene.
+        """
+        super().__init__()
+        self.threshold = threshold
+        self.bits = bits
+        self.min_scene_len = min_scene_len
+        self._hist_bins = range(2**(3 * self.bits))
+        self._last_hist = None
+        self._last_scene_cut = None
+
+    def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
+        """First, compress the image according to the self.bits value, then build a histogram for
+        the input frame. Afterward, compare against the previously analyzed frame and check if the
+        difference is large enough to trigger a cut.
+
+        Arguments:
+            frame_num: Frame number of frame that is being passed.
+            frame_img: Decoded frame image (numpy.ndarray) to perform scene
+                detection on.
+
+        Returns:
+            List of frames where scene cuts have been detected. There may be 0
+            or more frames in the list, and not necessarily the same as frame_num.
+        """
+        cut_list = []
+
+        np_data_type = frame_img.dtype
+
+        if np_data_type != numpy.uint8:
+            raise ValueError('Image must be 8-bit rgb for HistogramDetector')
+
+        if frame_img.shape[2] != 3:
+            raise ValueError('Image must have three color channels for HistogramDetector')
+
+        # Initialize last scene cut point at the beginning of the frames of interest.
+        if not self._last_scene_cut:
+            self._last_scene_cut = frame_num
+
+        # Quantize the image and separate the color channels
+        quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self.bits)
+
+        # Perform bit shifting operations and bitwise combine color channels into one array
+        composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self.bits)
+
+        # Create the histogram with a bin for every rgb value
+        hist, _ = numpy.histogram(composite_img, bins=self._hist_bins)
+
+        # We can only start detecting once we have a frame to compare with.
+        if self._last_hist is not None:
+            # Compute histogram difference between frames
+            hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist))
+
+            # Check if a new scene should be triggered
+            if hist_diff >= self.threshold and ((frame_num - self._last_scene_cut)
+                                                >= self.min_scene_len):
+                cut_list.append(frame_num)
+                self._last_scene_cut = frame_num
+
+            # Save stats to a StatsManager if it is being used
+            if self.stats_manager is not None:
+                self.stats_manager.set_metrics(frame_num, {self.METRIC_KEYS[0]: hist_diff})
+
+        self._last_hist = hist
+
+        return cut_list
+
+    def _quantize_frame(self, frame_img, bits):
+        """Quantizes the image based on the number of most significant figures to be preserved.
+
+        Arguments:
+            frame_img: The 8-bit rgb image of the frame being analyzed.
+            bits: The number of most significant bits to keep during quantization.
+
+        Returns:
+            [red_img, green_img, blue_img]:
+                The three separated color channels of the frame image that have been quantized.
+        """
+        # First, find the value of the number of most significant bits, padding with zeroes
+        bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2)
+
+        # Separate R, G, and B color channels and cast to int for easier bitwise operations
+        red_img = frame_img[:, :, 0].astype(int)
+        green_img = frame_img[:, :, 1].astype(int)
+        blue_img = frame_img[:, :, 2].astype(int)
+
+        # Quantize the frame images
+        red_img = red_img & bit_value
+        green_img = green_img & bit_value
+        blue_img = blue_img & bit_value
+
+        return [red_img, green_img, blue_img]
+
+    def _shift_bits(self, quantized_imgs, bits):
+        """Takes care of the bit shifting operations to combine the RGB color
+        channels into a single array.
+
+        Arguments:
+            quantized_imgs: A list of the three quantized images of the RGB color channels
+                respectively.
+            bits: The number of most significant bits to use for quantizing the image.
+
+        Returns:
+            composite_img: The resulting array after all bitwise operations.
+        """
+        # First, figure out how much each shift needs to be
+        blue_shift = 8 - bits
+        green_shift = 8 - 2 * bits
+        red_shift = 8 - 3 * bits
+
+        # Separate our color channels for ease
+        red_img = quantized_imgs[0]
+        green_img = quantized_imgs[1]
+        blue_img = quantized_imgs[2]
+
+        # Perform the bit shifting for each color
+        red_img = self._shift_images(img=red_img, img_shift=red_shift)
+        green_img = self._shift_images(img=green_img, img_shift=green_shift)
+        blue_img = self._shift_images(img=blue_img, img_shift=blue_shift)
+
+        # Join our rgb arrays together
+        composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img))
+
+        return composite_img
+
+    def _shift_images(self, img, img_shift):
+        """Do bitwise shifting operations for a color channel image checking for shift direction.
+
+        Arguments:
+            img: A quantized image of a single color channel
+            img_shift: How many bits to shift the values of img. If the value is negative, the shift
+                direction is to the left and 8 is added to make it a positive value.
+
+        Returns:
+            shifted_img: The bitwise shifted image.
+        """
+        if img_shift < 0:
+            img_shift += 8
+            shifted_img = numpy.left_shift(img, img_shift)
+        else:
+            shifted_img = numpy.right_shift(img, img_shift)
+
+        return shifted_img
+
+    def is_processing_required(self, frame_num: int) -> bool:
+        return True
+
+    def get_metrics(self) -> List[str]:
+        return HistogramDetector.METRIC_KEYS
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -43,7 +43,7 @@
 # TODO: Missing tests for --min-scene-len and --drop-short-scenes.
 
 SCENEDETECT_CMD = 'python -m scenedetect'
-ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive']
+ALL_DETECTORS = ['detect-content', 'detect-threshold', 'detect-adaptive', 'detect-hist']
 ALL_BACKENDS = ['opencv', 'pyav']
 
 DEFAULT_VIDEO_PATH = 'tests/resources/goldeneye.mp4'

diff --git a/tests/test_detectors.py b/tests/test_detectors.py
@@ -24,7 +24,7 @@
 import pytest
 
 from scenedetect import detect, SceneManager, FrameTimecode, StatsManager, SceneDetector
-from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector
+from scenedetect.detectors import AdaptiveDetector, ContentDetector, ThresholdDetector, HistogramDetector
 from scenedetect.backends.opencv import VideoStreamCv2
 
 
@@ -47,6 +47,31 @@ def get_absolute_path(relative_path: str) -> str:
     return abs_path
 
 
+# TODO: Add a test case for this in the fixtures defined below.
+def test_histogram_detector(test_movie_clip):
+    """ Test SceneManager with VideoStreamCv2 and HistogramDetector. """
+    TEST_MOVIE_CLIP_START_FRAMES_ACTUAL = [1199, 1226, 1260, 1281, 1334, 1365, 1590, 1697, 1871]
+    """Ground truth of start frame for each fast cut in `test_movie_clip`."""
+    video = VideoStreamCv2(test_movie_clip)
+    scene_manager = SceneManager()
+    scene_manager.add_detector(HistogramDetector())
+    scene_manager.auto_downscale = True
+
+    video_fps = video.frame_rate
+    start_time = FrameTimecode('00:00:50', video_fps)
+    end_time = FrameTimecode('00:01:19', video_fps)
+
+    video.seek(start_time)
+    scene_manager.detect_scenes(video=video, end_time=end_time)
+
+    scene_list = scene_manager.get_scene_list()
+    assert len(scene_list) == len(TEST_MOVIE_CLIP_START_FRAMES_ACTUAL)
+    detected_start_frames = [timecode.get_frames() for timecode, _ in scene_list]
+    assert TEST_MOVIE_CLIP_START_FRAMES_ACTUAL == detected_start_frames
+    # Ensure last scene's end timecode matches the end time we set.
+    assert scene_list[-1][1] == end_time
+
+
 @dataclass
 class TestCase:
     __test__ = False
@@ -178,7 +203,7 @@ def test_detect_fades(test_case: TestCase):
 def test_detectors_with_stats(test_video_file):
     """ Test all detectors functionality with a StatsManager. """
     # TODO(v1.0): Parameterize this test case (move fixture from cli to test config).
-    for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector]:
+    for detector in [ContentDetector, ThresholdDetector, AdaptiveDetector, HistogramDetector]:
         video = VideoStreamCv2(test_video_file)
         stats = StatsManager()
         scene_manager = SceneManager(stats_manager=stats)

diff --git a/website/pages/api.md b/website/pages/api.md
@@ -25,6 +25,10 @@ The adaptive content detector (`detect-adaptive`) compares the difference in con
 
 The threshold-based scene detector (`detect-threshold`) is how most traditional scene detection methods work (e.g. the `ffmpeg blackframe` filter), by comparing the intensity/brightness of the current frame with a set threshold, and triggering a scene cut/break when this value crosses the threshold.  In PySceneDetect, this value is computed by averaging the R, G, and B values for every pixel in the frame, yielding a single floating point number representing the average pixel value (from 0.0 to 255.0).
 
+## Histogram Detector
+
+The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered.
+
 # Creating New Detection Algorithms
 
 All scene detection algorithms must inherit from [the base `SceneDetector` class](https://scenedetect.com/projects/Manual/en/latest/api/scene_detector.html). Note that the current SceneDetector API is under development and expected to change somewhat before v1.0 is released, so make sure to pin your `scenedetect` dependency to the correct API version (e.g. `scenedetect < 0.6`, `scenedetect < 0.7`, etc...).