First commit

uug-ai · May 13, 2024 · 3e5a7ee · 3e5a7ee
1 parent 3ff96af
commit 3e5a7ee
Show file tree

Hide file tree

Showing 7 changed files with 585 additions and 0 deletions.
diff --git a/ml/README.md b/ml/README.md
@@ -0,0 +1,18 @@
+# Face Recognition and Embedding's Database 
+This repository contains the back-end ML component of the larger project — the integration of face recognition technology together with a robust vector database.
+
+Within this repository, we present:
+
+* **DeepFace**: representing a breakthrough in face recognition technology. Leveraging deep learning techniques, DeepFace extracts rich, high-dimensional features from facial images, enabling precise identification and verification tasks.
+* **Qdrant**: standing out as a robust vector database optimized for similarity search and clustering tasks. Designed to handle large-scale datasets efficiently, Qdrant employs state-of-the-art approximate nearest neighbor algorithms to rapidly retrieve similar vectors.
+* **HTTP POST, RabbitMQ and Kerberos Vault integration**: ...
+* **MQTT Response**: ...
+
+## DeepFace
+Deepface is a lightweight face recognition and facial attribute analysis (age, gender, emotion and race) framework for python. It is a hybrid face recognition framework wrapping state-of-the-art models: VGG-Face, FaceNet, OpenFace, DeepFace, DeepID, ArcFace, Dlib, SFace and GhostFaceNet. More information can be found on [DeepFace's Github](https://github.com/serengil/deepface)
+
+## Qdrant
+Qdrant is an enterprise-ready, high-performance, massive-scale Vector Database available as open-source, cloud, and managed on-premise solution. More information can be found on [Qdrant's Github](https://github.com/qdrant/qdrant-client)
+
+### Qdrant and DeepFace Colab ipynb
+Both technologies were combined for the implementation for our FAC application, [An example Python Notebook can be found here](https://colab.research.google.com/drive/1G46dqVhfDLoH6xdwmOhY9CetxRlYOrS8?usp=sharing)
diff --git a/ml/facial_recognition.py b/ml/facial_recognition.py
diff --git a/ml/scripts/processor.py b/ml/scripts/processor.py
@@ -0,0 +1,16 @@
+from utils.QueueProcessor import QueueProcessor
+from utils.FacialRecognition import FacialRecognition
+
+def write_video_file(obj_data):
+    """ Writes video data to a file and returns the file name.
+    """
+
+    file_name = './data/video.mp4'
+    with open(file_name, 'wb') as output:
+        output.write(obj_data)
+    return file_name
+
+processor = QueueProcessor()
+qdrant = FacialRecognition("people", ":memory:", embedding_size=4096, dist_metric="cosine")
+resp = processor.process_messages()
+video_file_path = write_video_file(resp.content)
diff --git a/ml/scripts/router.py b/ml/scripts/router.py
@@ -0,0 +1,19 @@
+from utils.QueueProcessor import QueueProcessor
+import os
+
+source_queue_name = os.getenv("QUEUE_NAME", "")
+target_queue_name = os.getenv("QUEUE_TARGET", "")
+source_queue_system = os.getenv("QUEUE_SYSTEM", "")
+storage_uri = os.getenv("VAULT_API_URL", "")
+storage_access_key = os.getenv("VAULT_ACCESS_KEY", "")
+storage_secret = os.getenv("VAULT_SECRET_KEY", "")
+
+processor = QueueProcessor(source_queue_name, target_queue_name, source_queue_system, storage_uri, storage_access_key, storage_secret)
+while True:
+    resp = processor.process_messages()
+
+    # process resp.content
+    ...
+
+    # append to queue
+    ...
diff --git a/ml/scripts/utils/FacialRecognition.py b/ml/scripts/utils/FacialRecognition.py
@@ -0,0 +1,80 @@
+from qdrant_client import models, QdrantClient
+from deepface import DeepFace
+import traceback
+
+class FacialRecognition:
+    def __init__(collection_name: str, db_path: str = ":memory:", embedding_size: int = 4096, dist_metric: str = "euclidean", verbose: bool = True) -> QdrantClient:
+        try:
+            qdrant = QdrantClient(db_path)
+            if verbose:
+                print("Initialised QdrantClient with path:", db_path)
+        except Exception as e:
+            traceback.print_exc()
+            print("Failure initialising QdrantClient:", e)
+            pass
+
+        distance = models.Distance.COSINE if dist_metric == "cosine" else models.Distance.EUCLID
+        if verbose:
+            print("Using distance metric: ", distance)
+
+        try:
+            # Create collection to store faces
+            qdrant.recreate_collection(
+                collection_name=collection_name,
+                vectors_config=models.VectorParams(
+                    size=embedding_size, # Vector size is defined by used model
+                    distance=distance
+                )
+            )
+            if verbose:
+                print("successfully created Qdrant collection")
+
+        except Exception as e:
+            traceback.print_exc()
+            print("Failure creating collection:", e)
+            pass
+
+        return qdrant
+
+    def batch_add_embeddings(qdrant: QdrantClient, collection_name: str, data: list[dict]):
+        try:
+            qdrant.upload_records(
+                collection_name=collection_name,
+                records=[
+                    models.Record(
+                        id=data["id"],
+                        # Embedding of the image
+                        vector=DeepFace.represent(img_path = doc["img_path"])[0]["embedding"],
+                        payload=doc
+                    ) for doc in data
+                ]
+            )
+            return True
+
+        except Exception as e:
+            traceback.print_exc()
+            print("Failure adding embeddings:", e)
+
+
+    def embedding_search(qdrant: QdrantClient, collection_name: str, input_embedding, verbose: bool = True):
+        hits = qdrant.search(
+            collection_name="people",
+            query_vector=input,
+            query_filter=models.Filter(
+                must=[
+                    models.FieldCondition(
+                        key="extra_filter",
+                        range=models.Range(
+                            gte=2 # greater than or equal
+                        )
+                    )
+                ]
+            ),
+            limit=1
+        )
+
+        if verbose:
+            for hit in hits:
+                print(hit.payload, "score:", hit.score)
+
+        return hits