diff --git a/appinfo/info.xml b/appinfo/info.xml index 7cffd91a..b59e75bd 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -16,7 +16,7 @@ - **🚀 Build your own thing:** FaceRecognition app is just a basic building block. Through FaceRecognition API, you can build your advanced scenarios - automatically add tags to images, connect contacts and persons, share images from specific person… We want to hear your ideas! ]]> - 0.9.20 + 0.9.30 agpl Matias De lellis Branko Kokanovic @@ -34,9 +34,7 @@ https://matiasdelellis.github.io/img/facerecognition/facerecognition-assign-initial-name.jpeg - pdlib - bz2 - + diff --git a/lib/BackgroundJob/Tasks/CheckRequirementsTask.php b/lib/BackgroundJob/Tasks/CheckRequirementsTask.php index cdd048da..745a3d01 100644 --- a/lib/BackgroundJob/Tasks/CheckRequirementsTask.php +++ b/lib/BackgroundJob/Tasks/CheckRequirementsTask.php @@ -90,8 +90,10 @@ public function execute(FaceRecognitionContext $context) { $phpMemory = MemoryLimits::getPhpMemory(); $this->logDebug("PHP Memory Limit: " . ($phpMemory > 0 ? $phpMemory : "Unknown")); + $this->logDebug("Clustering backend: " . (Requirements::pdlibLoaded() ? "pdlib" : "PHP (Not recommended.")); + if ($this->imaginaryHelper->isEnabled()) { - $this->logDebug("Backend of images: Imaginary"); + $this->logDebug("Image Backend: Imaginary"); $version = $this->imaginaryHelper->getVersion(); if ($version) { $this->logDebug("Imaginary version: " . $version); @@ -104,15 +106,7 @@ public function execute(FaceRecognitionContext $context) { return false; } } else { - $this->logDebug("Backend of images: Imagick"); - } - - if (!Requirements::pdlibLoaded()) { - $error_message = - "The PDlib PHP extension is not loaded. Cannot continue without it." . - "Please read the documentation again about how to install the application: https://github.com/matiasdelellis/facerecognition/wiki/Installation"; - $this->logInfo($error_message); - return false; + $this->logDebug("Image Backend: Imagick"); } if (!Requirements::hasEnoughMemory()) { diff --git a/lib/BackgroundJob/Tasks/CreateClustersTask.php b/lib/BackgroundJob/Tasks/CreateClustersTask.php index 8eee41f1..dc04c14e 100644 --- a/lib/BackgroundJob/Tasks/CreateClustersTask.php +++ b/lib/BackgroundJob/Tasks/CreateClustersTask.php @@ -1,6 +1,6 @@ + * @copyright Copyright (c) 2017-2023 Matias De lellis * @copyright Copyright (c) 2018, Branko Kokanovic * * @author Branko Kokanovic @@ -33,6 +33,9 @@ use OCA\FaceRecognition\Db\PersonMapper; use OCA\FaceRecognition\Helper\Euclidean; +use OCA\FaceRecognition\Helper\Requirements; + +use OCA\FaceRecognition\Clusterer\ChineseWhispers; use OCA\FaceRecognition\Service\SettingsService; /** @@ -282,10 +285,9 @@ private function getNewClusters(array $faces): array { // Clustering parameters $sensitivity = $this->settingsService->getSensitivity(); - // Create edges for chinese whispers - $edges = array(); - - if (version_compare(phpversion('pdlib'), '1.0.2', '>=')) { + if (!Requirements::pdlibLoaded()) { + // Create edges (neighbors) for Chinese Whispers + $edges = array(); $faces_count = count($faces); for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; @@ -304,8 +306,14 @@ private function getNewClusters(array $faces): array { } } } + + // Given the edges get the list of labels (found clusters) for each face. + $newChineseClustersByIndex = dlib_chinese_whispers($edges); } else { + // Create edges (neighbors) for Chinese Whispers + $edges = array(); $faces_count = count($faces); + for ($i = 0; $i < $faces_count; $i++) { $face1 = $faces[$i]; if (!isset($face1->descriptor)) { @@ -323,9 +331,20 @@ private function getNewClusters(array $faces): array { } } } + + // The clustering algorithm actually expects ordered lists. + $oedges = []; + ChineseWhispers::convert_unordered_to_ordered($edges, $oedges); + usort($oedges, function($a, $b) { + if ($a[0] === $b[0]) return $a[1] - $b[1]; + return $a[0] - $b[0]; + }); + + // Given the edges get the list of labels (found clusters) for each face. + $newChineseClustersByIndex = []; + ChineseWhispers::predict($oedges, $newChineseClustersByIndex); } - $newChineseClustersByIndex = dlib_chinese_whispers($edges); $newClusters = array(); for ($i = 0, $c = count($newChineseClustersByIndex); $i < $c; $i++) { if (!isset($newClusters[$newChineseClustersByIndex[$i]])) { @@ -333,7 +352,6 @@ private function getNewClusters(array $faces): array { } $newClusters[$newChineseClustersByIndex[$i]][] = $faces[$i]->id; } - return $newClusters; } diff --git a/lib/Clusterer/ChineseWhispers.php b/lib/Clusterer/ChineseWhispers.php new file mode 100644 index 00000000..700b584b --- /dev/null +++ b/lib/Clusterer/ChineseWhispers.php @@ -0,0 +1,155 @@ + + * + * @license AGPL-3.0-or-later + * + * This code is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License, version 3, + * along with this program. If not, see + * + */ + +namespace OCA\FaceRecognition\Clusterer; + + +/** + * This class implements the graph clustering algorithm described in the + * paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its + * Application to Natural Language Processing Problems by Chris Biemann. + * + * In particular, it tries to be a shameless copy of the original dlib + * implementation. + * - https://github.com/davisking/dlib/blob/master/dlib/clustering/chinese_whispers.h + */ +class ChineseWhispers { + + /** + * Cluster the dataset by assigning a label to each sample.from the edges + */ + static public function predict(array &$edges, array &$labels, int $num_iterations = 100) + { + $labels = []; + if (count($edges) == 0) + return 0; + + $neighbors = []; + self::find_neighbor_ranges($edges, $neighbors); + + // Initialize the labels, each node gets a different label. + for ($i = 0; $i < count($neighbors); ++$i) + $labels[$i] = $i; + + for ($iter = 0; $iter < count($neighbors)*$num_iterations; ++$iter) + { + // Pick a random node. + $idx = random_int(0, count($neighbors) - 1); + + // Count how many times each label happens amongst our neighbors. + $labels_to_counts = []; + $end = $neighbors[$idx][1]; + + for ($i = $neighbors[$idx][0]; $i != $end; ++$i) + { + $iLabelFirst = $edges[$i][1]; + $iLabel = $labels[$iLabelFirst]; + if (isset($labels_to_counts[$iLabel])) + $labels_to_counts[$iLabel]++; + else + $labels_to_counts[$iLabel] = 1; + } + + // find the most common label + // std::map::iterator i; + $best_score = PHP_INT_MIN; + $best_label = $labels[$idx]; + foreach ($labels_to_counts as $key => $value) + { + if ($value > $best_score) + { + $best_score = $value; + $best_label = $key; + } + } + + $labels[$idx] = $best_label; + } + + // Remap the labels into a contiguous range. First we find the + // mapping. + $label_remap = []; + for ($i = 0; $i < count($labels); ++$i) + { + $next_id = count($label_remap); + if (!isset($label_remap[$labels[$i]])) + $label_remap[$labels[$i]] = $next_id; + } + // now apply the mapping to all the labels. + for ($i = 0; $i < count($labels); ++$i) + { + $labels[$i] = $label_remap[$labels[$i]]; + } + + return count($label_remap); + } + + static function find_neighbor_ranges (&$edges, &$neighbors) { + // setup neighbors so that [neighbors[i].first, neighbors[i].second) is the range + // within edges that contains all node i's edges. + $num_nodes = self::max_index_plus_one($edges); + for ($i = 0; $i < $num_nodes; ++$i) $neighbors[$i] = [0, 0]; + $cur_node = 0; + $start_idx = 0; + for ($i = 0; $i < count($edges); ++$i) + { + if ($edges[$i][0] != $cur_node) + { + $neighbors[$cur_node] = [$start_idx, $i]; + $start_idx = $i; + $cur_node = $edges[$i][0]; + } + } + if (count($neighbors) !== 0) + $neighbors[$cur_node] = [$start_idx, count($edges)]; + } + + static function max_index_plus_one ($pairs): int { + if (count($pairs) === 0) + { + return 0; + } + else { + $max_idx = 0; + for ($i = 0; $i < count($pairs); ++$i) + { + if ($pairs[$i][0] > $max_idx) + $max_idx = $pairs[$i][0]; + if ($pairs[$i][1] > $max_idx) + $max_idx = $pairs[$i][1]; + } + return $max_idx + 1; + } + } + + static function convert_unordered_to_ordered (&$edges, &$out_edges) + { + $out_edges = []; + for ($i = 0; $i < count($edges); ++$i) + { + $out_edges[] = [$edges[$i][0], $edges[$i][1]]; + if ($edges[$i][0] != $edges[$i][1]) + $out_edges[] = [$edges[$i][1], $edges[$i][0]]; + } + } +} diff --git a/lib/Model/ExternalModel/ExternalModel.php b/lib/Model/ExternalModel/ExternalModel.php index b5c44df7..8b52633e 100644 --- a/lib/Model/ExternalModel/ExternalModel.php +++ b/lib/Model/ExternalModel/ExternalModel.php @@ -87,10 +87,6 @@ public function isInstalled(): bool { } public function meetDependencies(string &$error_message): bool { - if (!extension_loaded('pdlib')) { - $error_message = "The PDlib PHP extension is not loaded."; - return false; - } if (is_null($this->settingsService->getExternalModelUrl())) { $error_message = "You still need to configure the URL of the service running the model."; return false;