diff --git a/article-face_verification/.gitignore b/article-face_verification/.gitignore new file mode 100644 index 0000000..4f7994d --- /dev/null +++ b/article-face_verification/.gitignore @@ -0,0 +1,3 @@ +venv +__pycache__ +resnet_triplet.pth diff --git a/article-face_verification/README.md b/article-face_verification/README.md new file mode 100644 index 0000000..7a25081 --- /dev/null +++ b/article-face_verification/README.md @@ -0,0 +1,50 @@ +# Face Verification from Scratch using ResNet-18 + +Welcome to an article dedicated to Face Verification from Scratch using ResNet-18. +Buckle up, prepare your coffee, and get ready to dive into one of the most exciting areas of modern computer vision: teaching machines to decide whether two faces belong to the same person. + +## Article Summary + +The proposed article focuses on building a face verification pipeline using the Labeled Faces in the Wild (LFW) dataset and ResNet-18 as the backbone for feature extraction. By combining triplet sampling with triplet loss, we train the network to generate discriminative embeddings, enabling robust comparison of facial identities. + +## Getting Started + +Follow the following steps to check how Deep Belief Networks work for classification +of handwritten digits! + + +1. **Create a New Project**: Create a new empty Python Project and navigate to the + project directory. + + ```sh + cd face_verification_project + ``` + +2. **Prepare Your Environment**: Before you begin, make sure you have a virtual +environment set up for your project. If not, create and activate a virtual environment: + for Linux/Mac + ```bash + source venv/bin/activate + ``` + + for Windows: + + ```bash + venv\Scripts\activate + ``` + +3. **Copy the source code**: Inside the empty directory, add the file with the provided +source (`face_verification.py`) code and the requirements (`requirements.txt`). + +4. **Install Requirements**: Inside your virtual environment, install the required packages from the `requirements.txt` file: + + ```sh + pip install -r requirements.txt + ``` + +5. **Run the source code**: To run the source code provided for the DBN, use the command +provided below. It can take some time, as the MNIST dataset should be installed. + + ```sh + python face_verification.py + ``` diff --git a/article-face_verification/article.md b/article-face_verification/article.md new file mode 100644 index 0000000..d746d72 --- /dev/null +++ b/article-face_verification/article.md @@ -0,0 +1,219 @@ +

+ Fast_Learning +
+

+

Face Verification from Scratch

+

Face Verification is technique in the computer vision branch used most of the times in biometric authentication that uses unique facial features to confirm an individual's identity by comparing a live image of a face to a stored template or document

+ +In this article, we build a face verification system from scratch, using:
+* LFW dataset (Labeled Faces in the Wild) +* Triplet sampling (anchor, positive, negative) +* ResNet-18 backbone for embeddings +* Triplet Loss for metric learning +* Evaluation with ROC, AUC, and EER + +# Difference between Face Verification and Face Recognition + +Face verification is a 1:1 comparison that authenticates a person's identity by matching their live face against a pre-stored image of themselves, typically with their consent for a specific transaction. Face recognition is a broader 1:many comparison that identifies an individual by searching for a match within a large database of faces + +

+ Fast_Learning +
+Source:https://learnopencv.com/face-recognition-an-introduction-for-beginners/ +

+ +## Setting the enviroment + +```python +import argparse, random +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import Dataset, DataLoader +import torchvision.models as models +from sklearn.datasets import fetch_lfw_people +from sklearn.metrics import roc_curve, auc +``` +## LFW dataset + +**LFW dataset** (Labeled Faces in the Wild) is an open source dataset constitued of face photo and labeled with the persons name. We used +this dataset to create pairs of the same person for training. Each face is labeled with the name of the person, with **1,680 people** having two or more distinct photos in the set. LFW was the **most widely** used facial recognition benchmark in the world, according to the Financial Times. + +```Python +class LFWTriplet(Dataset): + def __init__(self, resize=0.5, color=True): + data = fetch_lfw_people(resize=resize, color=color, funneled=True) + self.images = data.images.astype(np.float32) + self.targets = data.target + self.names = data.target_names + + if not color: + # shape (N,H,W) -> (N,1,H,W) + self.images = self.images[:, None, :, :] / 255.0 + else: + # shape (N,H,W,3) -> (N,3,H,W) + self.images = np.transpose(self.images, (0,3,1,2)) / 255.0 + + self.class_to_indices = {} + for idx, label in enumerate(self.targets): + self.class_to_indices.setdefault(label, []).append(idx) + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + anchor_img = torch.from_numpy(self.images[idx]) + anchor_label = self.targets[idx] + + # choose positive + pos_idx = random.choice(self.class_to_indices[anchor_label]) + positive_img = torch.from_numpy(self.images[pos_idx]) + + # choose negative + neg_label = random.choice([l for l in self.class_to_indices.keys() if l != anchor_label]) + neg_idx = random.choice(self.class_to_indices[neg_label]) + negative_img = torch.from_numpy(self.images[neg_idx]) + + return anchor_img, positive_img, negative_img +``` + +

+Triplet sampling is how you pick (anchor, positive, negative) examples for metric learning with triplet loss(𝐴,𝑃,𝑁). +* A is an "Anchor" image--a picture of a person. +* P is a "Positive" image--a picture of the same person as the Anchor image. +* N is a "Negative" image--a picture of a different person than the Anchor image. + + +**The goal** of Triplet sampling is to make the embedding of the anchot closer to the Positive than Negative. + +$$ +|| f\left(A^{(i)}\right)-f\left(P^{(i)}\right)||_{2}^{2}+\alpha<|| f\left(A^{(i)}\right)-f\left(N^{(i)}\right)||_{2}^{2} +$$ + + +You would thus like to minimize the following "triplet cost": + +$$\mathcal{J} = \sum^{m}_{i=1} \large[ \small \underbrace{\mid \mid f(A^{(i)}) - f(P^{(i)}) \mid \mid_2^2}_\text{(1)} - \underbrace{\mid \mid f(A^{(i)}) - f(N^{(i)}) \mid \mid_2^2}_\text{(2)} + \alpha \large ] \small_+ \tag{3}$$ +Here, the notation "$[z]_+$" is used to denote $max(z,0)$. +

+ +```Python +class TripletLoss(nn.Module): + def __init__(self, margin=1.0): + super().__init__() + self.margin = margin + + def forward(self, anchor, positive, negative): + pos_dist = torch.norm(anchor - positive, dim=1) + neg_dist = torch.norm(anchor - negative, dim=1) + losses = F.relu(pos_dist - neg_dist + self.margin) + return losses.mean() +``` + +## ResNet-18 + +

Ok, the math part is done, we are going into the pretrained model. ResNet-18 is a convolutional neural network that is 18 layers deep. You can load a pretrained version of the network trained on more than a million images from the ImageNet database. The authors Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun introduce the idea of **residual block**. + +What is the **residual block**.

+``` +Input ──► Conv ─► BN ─► ReLU ─► Conv ─► BN ──┐─► ReLU + └───────────────────────────────────────┘ + skip connenction +``` + +The **skip connection** (bottom arrow) adds the input directly to the block’s output. +$$ +a^{[l+1]} = \text{ReLU}\big(W^{[l+1]} a^{[l]} + b^{[l+1]}\big) +$$ + +$$ +a^{[l+2]} = \text{ReLU}\big(W^{[l+2]} a^{[l+1]} + b^{[l+2]} + a^{[l]}\big) +$$ + +```Python +class ResNetEmbedding(nn.Module): + def __init__(self, out_dim=128, in_ch=3): + super().__init__() + base = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) + if in_ch == 1: + base.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) + base.fc = nn.Identity() + self.base = base + self.fc = nn.Linear(512, out_dim) + + def forward(self, x): + x = self.base(x) + x = self.fc(x) + return F.normalize(x, p=2, dim=1) +``` + +## Training and Evaluation + +

We train with Adam optimizer and evaluate using: +ROC Curve, AUC (Area Under Curve), EER (Equal Error Rate), Optimal Threshold for verification

+ +# Train +```Python +def train(model, loader, optimizer, criterion, device): + model.train() + running = 0.0 + for a,p,n in loader: + a,p,n = a.to(device), p.to(device), n.to(device) + optimizer.zero_grad() + za, zp, zn = model(a), model(p), model(n) + loss = criterion(za, zp, zn) + loss.backward() + optimizer.step() + running += loss.item() * a.size(0) + return running / len(loader.dataset) +``` + +# Evaluate +```Python +@torch.no_grad() +def evaluate(model, loader, device): + model.eval() + sims, labels = [], [] + for a,p,n in loader: + a,p,n = a.to(device), p.to(device), n.to(device) + za, zp, zn = model(a), model(p), model(n) + # positives + sims.extend(F.cosine_similarity(za, zp).cpu().numpy()) + labels.extend([1]*len(za)) + # negatives + sims.extend(F.cosine_similarity(za, zn).cpu().numpy()) + labels.extend([0]*len(za)) + sims = np.array(sims) + labels = np.array(labels) + fpr, tpr, thresh = roc_curve(labels, sims) + roc_auc = auc(fpr, tpr) + + # Find threshold where FPR ~ FNR + fnr = 1 - tpr + idx = np.nanargmin(np.abs(fnr - fpr)) + eer = (fpr[idx] + fnr[idx]) / 2.0 + thr = thresh[idx] + return float(roc_auc), float(eer), float(thr) +``` + +

+ Fast_Learning +
+

+ +## Run test cases + +```Python +python test_case.py +``` + +

+ Fast_Learning +
+

+ +# References + +* He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning for Image Recognition. https://arxiv.org/pdf/1512.03385 +* LFW dataset official page: http://vis-www.cs.umass.edu/lfw/ \ No newline at end of file diff --git a/article-face_verification/images/FVerificationVSFRecognition.jpg b/article-face_verification/images/FVerificationVSFRecognition.jpg new file mode 100644 index 0000000..977abc3 Binary files /dev/null and b/article-face_verification/images/FVerificationVSFRecognition.jpg differ diff --git a/article-face_verification/images/introduction_image.png b/article-face_verification/images/introduction_image.png new file mode 100644 index 0000000..08abd23 Binary files /dev/null and b/article-face_verification/images/introduction_image.png differ diff --git a/article-face_verification/images/plot_loss.png b/article-face_verification/images/plot_loss.png new file mode 100644 index 0000000..6550758 Binary files /dev/null and b/article-face_verification/images/plot_loss.png differ diff --git a/article-face_verification/images/test_case_results.png b/article-face_verification/images/test_case_results.png new file mode 100644 index 0000000..2e3d562 Binary files /dev/null and b/article-face_verification/images/test_case_results.png differ diff --git a/article-face_verification/images/test_image1.jpg b/article-face_verification/images/test_image1.jpg new file mode 100644 index 0000000..af6a103 Binary files /dev/null and b/article-face_verification/images/test_image1.jpg differ diff --git a/article-face_verification/images/test_image2.jpg b/article-face_verification/images/test_image2.jpg new file mode 100644 index 0000000..ff46d61 Binary files /dev/null and b/article-face_verification/images/test_image2.jpg differ diff --git a/article-face_verification/images/test_image3.jpg b/article-face_verification/images/test_image3.jpg new file mode 100644 index 0000000..773dd5a Binary files /dev/null and b/article-face_verification/images/test_image3.jpg differ diff --git a/article-face_verification/src/face_verification.py b/article-face_verification/src/face_verification.py new file mode 100644 index 0000000..7005583 --- /dev/null +++ b/article-face_verification/src/face_verification.py @@ -0,0 +1,206 @@ +import random +import numpy as np +import matplotlib.pyplot as plt + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import Dataset, DataLoader +import torchvision.models as models +from sklearn.datasets import fetch_lfw_people +from sklearn.metrics import roc_curve, auc + + +# 1) Dataset: LFW Triplets (Anchor, Positive, Negative) + +class LFWTriplet(Dataset): + """ + Dataset wrapper for LFW that generates triplets: + Anchor, Positive (same person), Negative (different person). + """ + def __init__(self, resize=0.5, color=True): + data = fetch_lfw_people(resize=resize, color=color, funneled=True) + self.images = data.images.astype(np.float32) + self.targets = data.target + self.names = data.target_names + + # Normalize and reshape + if not color: + self.images = self.images[:, None, :, :] / 255.0 + else: + self.images = np.transpose(self.images, (0, 3, 1, 2)) / 255.0 + + # Index map for sampling positives/negatives + self.class_to_indices = {} + for idx, label in enumerate(self.targets): + self.class_to_indices.setdefault(label, []).append(idx) + + def __len__(self): + return len(self.images) + + def __getitem__(self, idx): + anchor_img = torch.from_numpy(self.images[idx]) + anchor_label = self.targets[idx] + + # Select a positive sample (same person) + pos_idx = random.choice(self.class_to_indices[anchor_label]) + positive_img = torch.from_numpy(self.images[pos_idx]) + + # Select a negative sample (different person) + neg_label = random.choice([l for l in self.class_to_indices.keys() if l != anchor_label]) + neg_idx = random.choice(self.class_to_indices[neg_label]) + negative_img = torch.from_numpy(self.images[neg_idx]) + + return anchor_img, positive_img, negative_img + + +# 2) Model: ResNet-18 Embedding + +class ResNetEmbedding(nn.Module): + """ + ResNet-18 backbone modified for face embeddings. + Outputs a normalized 128-D vector for each input face. + """ + def __init__(self, out_dim=128, in_ch=3): + super().__init__() + base = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1) + if in_ch == 1: + base.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) + base.fc = nn.Identity() + self.base = base + self.fc = nn.Linear(512, out_dim) + + def forward(self, x): + x = self.base(x) + x = self.fc(x) + return F.normalize(x, p=2, dim=1) + + +# 3) Loss: Triplet Loss + +class TripletLoss(nn.Module): + """ + Triplet loss: enforces anchor-positive pairs closer + than anchor-negative pairs by a margin. + """ + def __init__(self, margin=1.0): + super().__init__() + self.margin = margin + + def forward(self, anchor, positive, negative): + pos_dist = torch.norm(anchor - positive, dim=1) + neg_dist = torch.norm(anchor - negative, dim=1) + losses = F.relu(pos_dist - neg_dist + self.margin) + return losses.mean() + + +# 4) Training / Evaluation Utilities + +def train_epoch(model, loader, optimizer, criterion, device): + """ + Train the model for one epoch. + """ + model.train() + running_loss = 0.0 + for a, p, n in loader: + a, p, n = a.to(device), p.to(device), n.to(device) + optimizer.zero_grad() + za, zp, zn = model(a), model(p), model(n) + loss = criterion(za, zp, zn) + loss.backward() + optimizer.step() + running_loss += loss.item() * a.size(0) + return running_loss / len(loader.dataset) + + +@torch.no_grad() +def evaluate(model, loader, device): + """ + Evaluate model using ROC-AUC and Equal Error Rate (EER). + """ + model.eval() + sims, labels = [], [] + for a, p, n in loader: + a, p, n = a.to(device), p.to(device), n.to(device) + za, zp, zn = model(a), model(p), model(n) + + # Positive pairs (same person) + sims.extend(F.cosine_similarity(za, zp).cpu().numpy()) + labels.extend([1] * len(za)) + + # Negative pairs (different person) + sims.extend(F.cosine_similarity(za, zn).cpu().numpy()) + labels.extend([0] * len(za)) + + sims = np.array(sims) + labels = np.array(labels) + + fpr, tpr, thresh = roc_curve(labels, sims) + roc_auc = auc(fpr, tpr) + + fnr = 1 - tpr + idx = np.nanargmin(np.abs(fnr - fpr)) + eer = (fpr[idx] + fnr[idx]) / 2.0 + thr = thresh[idx] + return float(roc_auc), float(eer), float(thr) + + +def plot_losses(train_losses, title="Training Loss"): + """ + Plot training loss curve. + """ + plt.figure(figsize=(8, 5)) + plt.plot(train_losses, marker='o') + plt.xlabel("Epoch") + plt.ylabel("Loss") + plt.title(title) + plt.grid(True) + plt.show() + + +# 5) Main Training Loop + +def main(epochs=5, batch_size=64, lr=1e-3, resize=0.5, color=True, margin=1.0, out_file="resnet_triplet.pth"): + """ + Main training pipeline for ResNet-based Face Verification. + """ + torch.manual_seed(42) + np.random.seed(42) + random.seed(42) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # Dataset & Loader + train_ds = LFWTriplet(resize=resize, color=color) + train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True) + + in_ch = 3 if color else 1 + model = ResNetEmbedding(in_ch=in_ch).to(device) + criterion = TripletLoss(margin=margin) + optimizer = torch.optim.Adam(model.parameters(), lr=lr) + + best_auc, best_thr = 0.0, None + train_losses = [] + + for epoch in range(1, epochs + 1): + train_loss = train_epoch(model, train_loader, optimizer, criterion, device) + auc_val, eer, thr = evaluate(model, train_loader, device) + + train_losses.append(train_loss) + + print(f"Epoch {epoch:02d} | Loss {train_loss:.4f} | AUC {auc_val:.4f} | EER {eer:.4f} | Thr {thr:.3f}") + + if auc_val > best_auc: + best_auc, best_thr = auc_val, thr + torch.save({ + "model": model.state_dict(), + "in_ch": in_ch, + "thr": thr + }, out_file) + + print(f"Best model saved to {out_file} with AUC={best_auc:.4f}, Thr={best_thr:.3f}") + plot_losses(train_losses) + + +if __name__ == "__main__": + main() diff --git a/article-face_verification/src/requirements.txt b/article-face_verification/src/requirements.txt new file mode 100644 index 0000000..2cbe11d --- /dev/null +++ b/article-face_verification/src/requirements.txt @@ -0,0 +1,41 @@ +contourpy==1.3.3 +cycler==0.12.1 +filelock==3.19.1 +fonttools==4.59.2 +fsspec==2025.9.0 +Jinja2==3.1.6 +joblib==1.5.2 +kiwisolver==1.4.9 +MarkupSafe==3.0.2 +matplotlib==3.10.6 +mpmath==1.3.0 +networkx==3.5 +numpy==2.3.2 +nvidia-cublas-cu12==12.8.4.1 +nvidia-cuda-cupti-cu12==12.8.90 +nvidia-cuda-nvrtc-cu12==12.8.93 +nvidia-cuda-runtime-cu12==12.8.90 +nvidia-cudnn-cu12==9.10.2.21 +nvidia-cufft-cu12==11.3.3.83 +nvidia-cufile-cu12==1.13.1.3 +nvidia-curand-cu12==10.3.9.90 +nvidia-cusolver-cu12==11.7.3.90 +nvidia-cusparse-cu12==12.5.8.93 +nvidia-cusparselt-cu12==0.7.1 +nvidia-nccl-cu12==2.27.3 +nvidia-nvjitlink-cu12==12.8.93 +nvidia-nvtx-cu12==12.8.90 +packaging==25.0 +pillow==11.3.0 +pyparsing==3.2.3 +python-dateutil==2.9.0.post0 +scikit-learn==1.7.1 +scipy==1.16.1 +setuptools==80.9.0 +six==1.17.0 +sympy==1.14.0 +threadpoolctl==3.6.0 +torch==2.8.0 +torchvision==0.23.0 +triton==3.4.0 +typing_extensions==4.15.0 diff --git a/article-face_verification/src/test_case.py b/article-face_verification/src/test_case.py new file mode 100644 index 0000000..9cb7099 --- /dev/null +++ b/article-face_verification/src/test_case.py @@ -0,0 +1,46 @@ +import torch +from PIL import Image +import torchvision.transforms as T +from face_verification import ResNetEmbedding + +# Load checkpoint +ckpt = torch.load("resnet_triplet.pth", map_location="cpu") + +# Build model with correct channels +in_ch = ckpt.get("in_ch", 3) +model = ResNetEmbedding(in_ch=in_ch) +model.load_state_dict(ckpt["model"]) +model.eval() + +# Threshold learned during training +sim_threshold = ckpt.get("thr", 0.5) + +# Preprocess: match training setup +transform = T.Compose([ + T.Resize((125, 94)), # match LFW training size + T.Grayscale() if in_ch == 1 else T.Lambda(lambda x: x), # grayscale if needed + T.ToTensor() +]) + +def load_face(path): + img = Image.open(path).convert("RGB") + return transform(img).unsqueeze(0) # (1,C,H,W) + +def verify(img1_path, img2_path): + x1, x2 = load_face(img1_path), load_face(img2_path) + with torch.no_grad(): + z1, z2 = model(x1), model(x2) + sim = torch.nn.functional.cosine_similarity(z1, z2).item() + same = sim >= sim_threshold + return same, sim, sim_threshold + +# Example test cases +image1 = "../images/test_image1.jpg" +image2 = "../images/test_image2.jpg" +image3 = "../images/test_image3.jpg" + +same, sim, thr = verify(image1, image2) +print(f"Test case 1: Same person? {same} | sim={sim:.3f} | threshold={thr:.3f}") + +same, sim, thr = verify(image1, image3) +print(f"Test case 2: Same person? {same} | sim={sim:.3f} | threshold={thr:.3f}") \ No newline at end of file