Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
GlennDeLoose01 committed May 13, 2024
1 parent 3ff96af commit 3e5a7ee
Show file tree
Hide file tree
Showing 7 changed files with 585 additions and 0 deletions.
18 changes: 18 additions & 0 deletions ml/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Face Recognition and Embedding's Database
This repository contains the back-end ML component of the larger project — the integration of face recognition technology together with a robust vector database.

Within this repository, we present:

* **DeepFace**: representing a breakthrough in face recognition technology. Leveraging deep learning techniques, DeepFace extracts rich, high-dimensional features from facial images, enabling precise identification and verification tasks.
* **Qdrant**: standing out as a robust vector database optimized for similarity search and clustering tasks. Designed to handle large-scale datasets efficiently, Qdrant employs state-of-the-art approximate nearest neighbor algorithms to rapidly retrieve similar vectors.
* **HTTP POST, RabbitMQ and Kerberos Vault integration**: ...
* **MQTT Response**: ...

## DeepFace
Deepface is a lightweight face recognition and facial attribute analysis (age, gender, emotion and race) framework for python. It is a hybrid face recognition framework wrapping state-of-the-art models: VGG-Face, FaceNet, OpenFace, DeepFace, DeepID, ArcFace, Dlib, SFace and GhostFaceNet. More information can be found on [DeepFace's Github](https://github.com/serengil/deepface)

## Qdrant
Qdrant is an enterprise-ready, high-performance, massive-scale Vector Database available as open-source, cloud, and managed on-premise solution. More information can be found on [Qdrant's Github](https://github.com/qdrant/qdrant-client)

### Qdrant and DeepFace Colab ipynb
Both technologies were combined for the implementation for our FAC application, [An example Python Notebook can be found here](https://colab.research.google.com/drive/1G46dqVhfDLoH6xdwmOhY9CetxRlYOrS8?usp=sharing)
Empty file removed ml/facial_recognition.py
Empty file.
16 changes: 16 additions & 0 deletions ml/scripts/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from utils.QueueProcessor import QueueProcessor
from utils.FacialRecognition import FacialRecognition

def write_video_file(obj_data):
""" Writes video data to a file and returns the file name.
"""

file_name = './data/video.mp4'
with open(file_name, 'wb') as output:
output.write(obj_data)
return file_name

processor = QueueProcessor()
qdrant = FacialRecognition("people", ":memory:", embedding_size=4096, dist_metric="cosine")
resp = processor.process_messages()
video_file_path = write_video_file(resp.content)
19 changes: 19 additions & 0 deletions ml/scripts/router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from utils.QueueProcessor import QueueProcessor
import os

source_queue_name = os.getenv("QUEUE_NAME", "")
target_queue_name = os.getenv("QUEUE_TARGET", "")
source_queue_system = os.getenv("QUEUE_SYSTEM", "")
storage_uri = os.getenv("VAULT_API_URL", "")
storage_access_key = os.getenv("VAULT_ACCESS_KEY", "")
storage_secret = os.getenv("VAULT_SECRET_KEY", "")

processor = QueueProcessor(source_queue_name, target_queue_name, source_queue_system, storage_uri, storage_access_key, storage_secret)
while True:
resp = processor.process_messages()

# process resp.content
...

# append to queue
...
80 changes: 80 additions & 0 deletions ml/scripts/utils/FacialRecognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from qdrant_client import models, QdrantClient
from deepface import DeepFace
import traceback

class FacialRecognition:
def __init__(collection_name: str, db_path: str = ":memory:", embedding_size: int = 4096, dist_metric: str = "euclidean", verbose: bool = True) -> QdrantClient:
try:
qdrant = QdrantClient(db_path)
if verbose:
print("Initialised QdrantClient with path:", db_path)
except Exception as e:
traceback.print_exc()
print("Failure initialising QdrantClient:", e)
pass

distance = models.Distance.COSINE if dist_metric == "cosine" else models.Distance.EUCLID
if verbose:
print("Using distance metric: ", distance)

try:
# Create collection to store faces
qdrant.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(
size=embedding_size, # Vector size is defined by used model
distance=distance
)
)
if verbose:
print("successfully created Qdrant collection")

except Exception as e:
traceback.print_exc()
print("Failure creating collection:", e)
pass

return qdrant

def batch_add_embeddings(qdrant: QdrantClient, collection_name: str, data: list[dict]):
try:
qdrant.upload_records(
collection_name=collection_name,
records=[
models.Record(
id=data["id"],
# Embedding of the image
vector=DeepFace.represent(img_path = doc["img_path"])[0]["embedding"],
payload=doc
) for doc in data
]
)
return True

except Exception as e:
traceback.print_exc()
print("Failure adding embeddings:", e)


def embedding_search(qdrant: QdrantClient, collection_name: str, input_embedding, verbose: bool = True):
hits = qdrant.search(
collection_name="people",
query_vector=input,
query_filter=models.Filter(
must=[
models.FieldCondition(
key="extra_filter",
range=models.Range(
gte=2 # greater than or equal
)
)
]
),
limit=1
)

if verbose:
for hit in hits:
print(hit.payload, "score:", hit.score)

return hits
Loading

0 comments on commit 3e5a7ee

Please sign in to comment.