Skip to content

Commit

Permalink
Implement the Chinese Whispers clustering algorithm in native PHP
Browse files Browse the repository at this point in the history
To the happiness of many (Issue #690, #688, #687, #685, #649, #632,
extension, but it goes without saying that its use is still highly
recommended.

You will understand that it is slower, however I must admit that with
JIT enabled, it is quite acceptable, and this is the only reason why
decided to publish it.

It is still experimental, and it works, but it has problems such as it
seems not to converge in stable clusters. When I can fix this, it will
probably be even slower.
  • Loading branch information
matiasdelellis committed Aug 22, 2023
1 parent 76fe598 commit 1f80f52
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 26 deletions.
6 changes: 2 additions & 4 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
- **🚀 Build your own thing:** FaceRecognition app is just a basic building block. Through FaceRecognition API, you can build your advanced scenarios - automatically add tags to images, connect contacts and persons, share images from specific person… We want to hear your ideas!
]]>
</description>
<version>0.9.20</version>
<version>0.9.30</version>
<licence>agpl</licence>
<author>Matias De lellis</author>
<author>Branko Kokanovic</author>
Expand All @@ -34,9 +34,7 @@
<screenshot>https://matiasdelellis.github.io/img/facerecognition/facerecognition-assign-initial-name.jpeg</screenshot>
<dependencies>
<php min-version="8.0" max-version="8.2" />
<lib>pdlib</lib>
<lib>bz2</lib>
<nextcloud min-version="26" max-version="27"/>
<nextcloud min-version="27" max-version="27"/>
</dependencies>
<repair-steps>
<uninstall>
Expand Down
16 changes: 5 additions & 11 deletions lib/BackgroundJob/Tasks/CheckRequirementsTask.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,29 +90,23 @@ public function execute(FaceRecognitionContext $context) {
$phpMemory = MemoryLimits::getPhpMemory();
$this->logDebug("PHP Memory Limit: " . ($phpMemory > 0 ? $phpMemory : "Unknown"));

$this->logDebug("Clustering backend: " . (Requirements::pdlibLoaded() ? "pdlib" : "PHP (Not recommended."));

if ($this->imaginaryHelper->isEnabled()) {
$this->logDebug("Backend of images: Imaginary");
$this->logDebug("Image Backend: Imaginary");
$version = $this->imaginaryHelper->getVersion();
if ($version) {
$this->logDebug("Imaginary version: " . $version);
} else {
$imaginaryUrl = $this->imaginaryHelper->getUrl();
$error_message =
"An Imaginary service (" . $imaginaryUrl . ") was configured to manage temporary images, but it is inaccessible." .
"An Imaginpary service (" . $imaginaryUrl . ") was configured to manage temporary images, but it is inaccessible." .
"Check out the service, or set the 'preview_imaginary_url' key appropriately.";
$this->logInfo($error_message);
return false;
}
} else {
$this->logDebug("Backend of images: Imagick");
}

if (!Requirements::pdlibLoaded()) {
$error_message =
"The PDlib PHP extension is not loaded. Cannot continue without it." .
"Please read the documentation again about how to install the application: https://github.com/matiasdelellis/facerecognition/wiki/Installation";
$this->logInfo($error_message);
return false;
$this->logDebug("Image Backend: Imagick");
}

if (!Requirements::hasEnoughMemory()) {
Expand Down
32 changes: 25 additions & 7 deletions lib/BackgroundJob/Tasks/CreateClustersTask.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php
/**
* @copyright Copyright (c) 2017-2020 Matias De lellis <[email protected]>
* @copyright Copyright (c) 2017-2023 Matias De lellis <[email protected]>
* @copyright Copyright (c) 2018, Branko Kokanovic <[email protected]>
*
* @author Branko Kokanovic <[email protected]>
Expand Down Expand Up @@ -33,6 +33,9 @@
use OCA\FaceRecognition\Db\PersonMapper;

use OCA\FaceRecognition\Helper\Euclidean;
use OCA\FaceRecognition\Helper\Requirements;

use OCA\FaceRecognition\Clusterer\ChineseWhispers;

use OCA\FaceRecognition\Service\SettingsService;
/**
Expand Down Expand Up @@ -282,10 +285,9 @@ private function getNewClusters(array $faces): array {
// Clustering parameters
$sensitivity = $this->settingsService->getSensitivity();

// Create edges for chinese whispers
$edges = array();

if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) {
if (!Requirements::pdlibLoaded()) {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);
for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
Expand All @@ -304,8 +306,14 @@ private function getNewClusters(array $faces): array {
}
}
}

// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = dlib_chinese_whispers($edges);
} else {
// Create edges (neighbors) for Chinese Whispers
$edges = array();
$faces_count = count($faces);

for ($i = 0; $i < $faces_count; $i++) {
$face1 = $faces[$i];
if (!isset($face1->descriptor)) {
Expand All @@ -323,17 +331,27 @@ private function getNewClusters(array $faces): array {
}
}
}

// The clustering algorithm actually expects ordered lists.
$oedges = [];
ChineseWhispers::convert_unordered_to_ordered($edges, $oedges);
usort($oedges, function($a, $b) {
if ($a[0] === $b[0]) return $a[1] - $b[1];
return $a[0] - $b[0];
});

// Given the edges get the list of labels (found clusters) for each face.
$newChineseClustersByIndex = [];
ChineseWhispers::predict($oedges, $newChineseClustersByIndex);
}

$newChineseClustersByIndex = dlib_chinese_whispers($edges);
$newClusters = array();
for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) {
if (!isset($newClusters[$newChineseClustersByIndex[$i]])) {
$newClusters[$newChineseClustersByIndex[$i]] = array();
}
$newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id;
}

return $newClusters;
}

Expand Down
155 changes: 155 additions & 0 deletions lib/Clusterer/ChineseWhispers.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
<?php
declare(strict_types=1);
/**
* @copyright Copyright (c) 2023, Matias De lellis
*
* @author Matias De lellis <[email protected]>
*
* @license AGPL-3.0-or-later
*
* This code is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License, version 3,
* along with this program. If not, see <http://www.gnu.org/licenses/>
*
*/

namespace OCA\FaceRecognition\Clusterer;


/**
* This class implements the graph clustering algorithm described in the
* paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
* Application to Natural Language Processing Problems by Chris Biemann.
*
* In particular, it tries to be a shameless copy of the original dlib
* implementation.
* - https://github.com/davisking/dlib/blob/master/dlib/clustering/chinese_whispers.h
*/
class ChineseWhispers {

/**
* Cluster the dataset by assigning a label to each sample.from the edges
*/
static public function predict(array &$edges, array &$labels, int $num_iterations = 100)
{
$labels = [];
if (count($edges) == 0)
return 0;

$neighbors = [];
self::find_neighbor_ranges($edges, $neighbors);

// Initialize the labels, each node gets a different label.
for ($i = 0; $i < count($neighbors); ++$i)
$labels[$i] = $i;

for ($iter = 0; $iter < count($neighbors)*$num_iterations; ++$iter)
{
// Pick a random node.
$idx = random_int(0, count($neighbors) - 1);

// Count how many times each label happens amongst our neighbors.
$labels_to_counts = [];
$end = $neighbors[$idx][1];

for ($i = $neighbors[$idx][0]; $i != $end; ++$i)
{
$iLabelFirst = $edges[$i][1];
$iLabel = $labels[$iLabelFirst];
if (isset($labels_to_counts[$iLabel]))
$labels_to_counts[$iLabel]++;
else
$labels_to_counts[$iLabel] = 1;
}

// find the most common label
// std::map<unsigned long, double>::iterator i;
$best_score = PHP_INT_MIN;
$best_label = $labels[$idx];
foreach ($labels_to_counts as $key => $value)
{
if ($value > $best_score)
{
$best_score = $value;
$best_label = $key;
}
}

$labels[$idx] = $best_label;
}

// Remap the labels into a contiguous range. First we find the
// mapping.
$label_remap = [];
for ($i = 0; $i < count($labels); ++$i)
{
$next_id = count($label_remap);
if (!isset($label_remap[$labels[$i]]))
$label_remap[$labels[$i]] = $next_id;
}
// now apply the mapping to all the labels.
for ($i = 0; $i < count($labels); ++$i)
{
$labels[$i] = $label_remap[$labels[$i]];
}

return count($label_remap);
}

static function find_neighbor_ranges (&$edges, &$neighbors) {
// setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range
// within edges that contains all node i's edges.
$num_nodes = self::max_index_plus_one($edges);
for ($i = 0; $i < $num_nodes; ++$i) $neighbors[$i] = [0, 0];
$cur_node = 0;
$start_idx = 0;
for ($i = 0; $i < count($edges); ++$i)
{
if ($edges[$i][0] != $cur_node)
{
$neighbors[$cur_node] = [$start_idx, $i];
$start_idx = $i;
$cur_node = $edges[$i][0];
}
}
if (count($neighbors) !== 0)
$neighbors[$cur_node] = [$start_idx, count($edges)];
}

static function max_index_plus_one ($pairs): int {
if (count($pairs) === 0)
{
return 0;
}
else {
$max_idx = 0;
for ($i = 0; $i < count($pairs); ++$i)
{
if ($pairs[$i][0] > $max_idx)
$max_idx = $pairs[$i][0];
if ($pairs[$i][1] > $max_idx)
$max_idx = $pairs[$i][1];
}
return $max_idx + 1;
}
}

static function convert_unordered_to_ordered (&$edges, &$out_edges)
{
$out_edges = [];
for ($i = 0; $i < count($edges); ++$i)
{
$out_edges[] = [$edges[$i][0], $edges[$i][1]];
if ($edges[$i][0] != $edges[$i][1])
$out_edges[] = [$edges[$i][1], $edges[$i][0]];
}
}
}
4 changes: 0 additions & 4 deletions lib/Model/ExternalModel/ExternalModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ public function isInstalled(): bool {
}

public function meetDependencies(string &$error_message): bool {
if (!extension_loaded('pdlib')) {
$error_message = "The PDlib PHP extension is not loaded.";
return false;
}
if (is_null($this->settingsService->getExternalModelUrl())) {
$error_message = "You still need to configure the URL of the service running the model.";
return false;
Expand Down

0 comments on commit 1f80f52

Please sign in to comment.