From 28485164703efb9d9f0d92786109fea5afe9deb4 Mon Sep 17 00:00:00 2001
From: mk-minchul <minchul.kim@snu.ac.kr>
Date: Wed, 8 Jun 2022 13:49:41 +0000
Subject: [PATCH] adding tinyface eval script

---
 validation_lq/README.md            |  16 +++
 validation_lq/data_utils.py        |  53 ++++++++
 validation_lq/tinyface_helper.py   | 208 +++++++++++++++++++++++++++++
 validation_lq/validate_tinyface.py | 161 ++++++++++++++++++++++
 4 files changed, 438 insertions(+)
 create mode 100644 validation_lq/tinyface_helper.py
 create mode 100644 validation_lq/validate_tinyface.py

diff --git a/validation_lq/README.md b/validation_lq/README.md
index 8029aba..0fb6199 100644
--- a/validation_lq/README.md
+++ b/validation_lq/README.md
@@ -30,4 +30,20 @@ EX)
 4. Run
 ```
 python validate_IJB_S.py --data_root $DATA_ROOT --model_name ir50
+```
+
+
+# TinyFace Evaluation
+
+1. Download the evaluation split from https://qmul-tinyface.github.io/
+   1. Extract the zip file under `<data_root>`. 
+2. TinyFace Evaluation images have to be aligned and resized. 
+   1. You may perform the alignment yourself with MTCNN or download by completing this form [link](https://forms.gle/Mz1LNrQwn1Bwjvo86).
+   2. Do not re-distribute the aligned data. It is released for encouraging reproducibility of research. 
+   But if it infringes the copy-right of the original tinyface dataset, the aligned version may be retracted.
+   5. Extract the zip file under `<data_root>`
+3. You may run evaluation with the example script below.
+
+```
+python validate_tinyface.py --data_root <data_root> --model_name ir101_webface4m
 ```
\ No newline at end of file
diff --git a/validation_lq/data_utils.py b/validation_lq/data_utils.py
index 52e331f..2f56140 100644
--- a/validation_lq/data_utils.py
+++ b/validation_lq/data_utils.py
@@ -43,6 +43,42 @@ def __getitem__(self, idx):
             img = self.transform(img)
         return img, idx
 
+
+class ListDataset(Dataset):
+    def __init__(self, img_list, image_is_saved_with_swapped_B_and_R=False):
+        super(ListDataset, self).__init__()
+
+        # image_is_saved_with_swapped_B_and_R: correctly saved image should have this set to False
+        # face_emore/img has images saved with B and G (of RGB) swapped.
+        # Since training data loader uses PIL (results in RGB) to read image
+        # and validation data loader uses cv2 (results in BGR) to read image, this swap was okay.
+        # But if you want to evaluate on the training data such as face_emore/img (B and G swapped),
+        # then you should set image_is_saved_with_swapped_B_and_R=True
+
+        self.img_list = img_list
+        self.transform = transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+
+        self.image_is_saved_with_swapped_B_and_R = image_is_saved_with_swapped_B_and_R
+
+
+    def __len__(self):
+        return len(self.img_list)
+
+    def __getitem__(self, idx):
+        image_path = self.img_list[idx]
+        img = cv2.imread(image_path)
+        img = img[:, :, :3]
+
+        if self.image_is_saved_with_swapped_B_and_R:
+            print('check if it really should be on')
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        img = Image.fromarray(img)
+        img = self.transform(img)
+        return img, idx
+
+
 def prepare_imagelist_dataloader(img_list, batch_size, num_workers=0, image_is_saved_with_swapped_B_and_R=False):
     # image_is_saved_with_swapped_B_and_R: correctly saved image should have this set to False
     # face_emore/img has images saved with B and G (of RGB) swapped. 
@@ -53,4 +89,21 @@ def prepare_imagelist_dataloader(img_list, batch_size, num_workers=0, image_is_s
 
     image_dataset = ListDatasetWithIndex(img_list, image_is_saved_with_swapped_B_and_R)
     dataloader = DataLoader(image_dataset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers)
+    return dataloader
+
+
+def prepare_dataloader(img_list, batch_size, num_workers=0, image_is_saved_with_swapped_B_and_R=False):
+    # image_is_saved_with_swapped_B_and_R: correctly saved image should have this set to False
+    # face_emore/img has images saved with B and G (of RGB) swapped.
+    # Since training data loader uses PIL (results in RGB) to read image
+    # and validation data loader uses cv2 (results in BGR) to read image, this swap was okay.
+    # But if you want to evaluate on the training data such as face_emore/img (B and G swapped),
+    # then you should set image_is_saved_with_swapped_B_and_R=True
+
+    image_dataset = ListDataset(img_list, image_is_saved_with_swapped_B_and_R=image_is_saved_with_swapped_B_and_R)
+    dataloader = DataLoader(image_dataset,
+                            batch_size=batch_size,
+                            shuffle=False,
+                            drop_last=False,
+                            num_workers=num_workers)
     return dataloader
\ No newline at end of file
diff --git a/validation_lq/tinyface_helper.py b/validation_lq/tinyface_helper.py
new file mode 100644
index 0000000..fbf12d5
--- /dev/null
+++ b/validation_lq/tinyface_helper.py
@@ -0,0 +1,208 @@
+import numpy as np
+import scipy.io as sio
+import os
+import scipy
+
+def get_all_files(root, extension_list=['.jpg', '.png', '.jpeg']):
+
+    all_files = list()
+    for (dirpath, dirnames, filenames) in os.walk(root):
+        all_files += [os.path.join(dirpath, file) for file in filenames]
+    if extension_list is None:
+        return all_files
+    all_files = list(filter(lambda x: os.path.splitext(x)[1] in extension_list, all_files))
+    return all_files
+
+
+class TinyFaceTest:
+    def __init__(self, tinyface_root='/data/data/faces/tinyface', alignment_dir_name='aligned_pad_0.1_pad_high'):
+
+        self.tinyface_root = tinyface_root
+        # as defined by tinyface protocol
+        self.gallery_dict = scipy.io.loadmat(os.path.join(tinyface_root, 'tinyface/Testing_Set/gallery_match_img_ID_pairs.mat'))
+        self.probe_dict = scipy.io.loadmat(os.path.join(tinyface_root, 'tinyface/Testing_Set/probe_img_ID_pairs.mat'))
+        self.proto_gal_paths = [os.path.join(tinyface_root, alignment_dir_name, 'Gallery_Match', p[0].item()) for p in self.gallery_dict['gallery_set']]
+        self.proto_prob_paths = [os.path.join(tinyface_root, alignment_dir_name, 'Probe', p[0].item()) for p in self.probe_dict['probe_set']]
+        self.proto_distractor_paths = get_all_files(os.path.join(tinyface_root, alignment_dir_name, 'Gallery_Distractor'))
+
+        self.image_paths = get_all_files(os.path.join(tinyface_root, alignment_dir_name))
+        self.image_paths = np.array(self.image_paths).astype(np.object).flatten()
+
+        self.probe_paths = get_all_files(os.path.join(tinyface_root, 'tinyface/Testing_Set/Probe'))
+        self.probe_paths = np.array(self.probe_paths).astype(np.object).flatten()
+
+        self.gallery_paths = get_all_files(os.path.join(tinyface_root, 'tinyface/Testing_Set/Gallery_Match'))
+        self.gallery_paths = np.array(self.gallery_paths).astype(np.object).flatten()
+
+        self.distractor_paths = get_all_files(os.path.join(tinyface_root, 'tinyface/Testing_Set/Gallery_Distractor'))
+        self.distractor_paths = np.array(self.distractor_paths).astype(np.object).flatten()
+
+        self.init_proto(self.probe_paths, self.gallery_paths, self.distractor_paths)
+
+    def get_key(self, image_path):
+        return os.path.splitext(os.path.basename(image_path))[0]
+
+    def get_label(self, image_path):
+        return int(os.path.basename(image_path).split('_')[0])
+
+    def init_proto(self, probe_paths, match_paths, distractor_paths):
+        index_dict = {}
+        for i, image_path in enumerate(self.image_paths):
+            index_dict[self.get_key(image_path)] = i
+
+        self.indices_probe = np.array([index_dict[self.get_key(img)] for img in probe_paths])
+        self.indices_match = np.array([index_dict[self.get_key(img)] for img in match_paths])
+        self.indices_distractor = np.array([index_dict[self.get_key(img)] for img in distractor_paths])
+
+        self.labels_probe = np.array([self.get_label(img) for img in probe_paths])
+        self.labels_match = np.array([self.get_label(img) for img in match_paths])
+        self.labels_distractor = np.array([-100 for img in distractor_paths])
+
+        self.indices_gallery = np.concatenate([self.indices_match, self.indices_distractor])
+        self.labels_gallery = np.concatenate([self.labels_match, self.labels_distractor])
+
+
+    def test_identification(self, features, ranks=[1,5,20]):
+        feat_probe = features[self.indices_probe]
+        feat_gallery = features[self.indices_gallery]
+        compare_func = inner_product
+        score_mat = compare_func(feat_probe, feat_gallery)
+
+        label_mat = self.labels_probe[:,None] == self.labels_gallery[None,:]
+
+        results, _, __ = DIR_FAR(score_mat, label_mat, ranks)
+
+        return results
+
+def inner_product(x1, x2):
+    x1, x2 = np.array(x1), np.array(x2)
+    if x1.ndim == 3:
+        raise ValueError('why?')
+        x1, x2 = x1[:,:,0], x2[:,:,0]
+    return np.dot(x1, x2.T)
+
+
+
+def DIR_FAR(score_mat, label_mat, ranks=[1], FARs=[1.0], get_false_indices=False):
+    '''
+    Code borrowed from https://github.com/seasonSH/Probabilistic-Face-Embeddings
+
+    Closed/Open-set Identification.
+        A general case of Cummulative Match Characteristic (CMC)
+        where thresholding is allowed for open-set identification.
+    args:
+        score_mat:            a P x G matrix, P is number of probes, G is size of gallery
+        label_mat:            a P x G matrix, bool
+        ranks:                a list of integers
+        FARs:                 false alarm rates, if 1.0, closed-set identification (CMC)
+        get_false_indices:    not implemented yet
+    return:
+        DIRs:                 an F x R matrix, F is the number of FARs, R is the number of ranks,
+                              flatten into a vector if F=1 or R=1.
+        FARs:                 an vector of length = F.
+        thredholds:           an vector of length = F.
+    '''
+    assert score_mat.shape==label_mat.shape
+    # assert np.all(label_mat.astype(np.float32).sum(axis=1) <=1 )
+    # Split the matrix for match probes and non-match probes
+    # subfix _m: match, _nm: non-match
+    # For closed set, we only use the match probes
+    match_indices = label_mat.astype(np.bool).any(axis=1)
+    score_mat_m = score_mat[match_indices,:]
+    label_mat_m = label_mat[match_indices,:]
+    score_mat_nm = score_mat[np.logical_not(match_indices),:]
+    label_mat_nm = label_mat[np.logical_not(match_indices),:]
+
+    print('mate probes: %d, non mate probes: %d' % (score_mat_m.shape[0], score_mat_nm.shape[0]))
+
+    # Find the thresholds for different FARs
+    max_score_nm = np.max(score_mat_nm, axis=1)
+    label_temp = np.zeros(max_score_nm.shape, dtype=np.bool)
+    if len(FARs) == 1 and FARs[0] >= 1.0:
+        # If only testing closed-set identification, use the minimum score as threshold
+        # in case there is no non-mate probes
+        thresholds = [np.min(score_mat) - 1e-10]
+        openset = False
+    else:
+        # If there is open-set identification, find the thresholds by FARs.
+        assert score_mat_nm.shape[0] > 0, "For open-set identification (FAR<1.0), there should be at least one non-mate probe!"
+        thresholds = find_thresholds_by_FAR(max_score_nm, label_temp, FARs=FARs)
+        openset = True
+
+    # Sort the labels row by row according to scores
+    sort_idx_mat_m = np.argsort(score_mat_m, axis=1)
+    sorted_label_mat_m = np.ndarray(label_mat_m.shape, dtype=np.bool)
+    for row in range(label_mat_m.shape[0]):
+        sort_idx = (sort_idx_mat_m[row, :])[::-1]
+        sorted_label_mat_m[row,:] = label_mat_m[row, sort_idx]
+
+    # Calculate DIRs for different FARs and ranks
+    if openset:
+        gt_score_m = score_mat_m[label_mat_m]
+        assert gt_score_m.size == score_mat_m.shape[0]
+
+    DIRs = np.zeros([len(FARs), len(ranks)], dtype=np.float32)
+    FARs = np.zeros([len(FARs)], dtype=np.float32)
+    if get_false_indices:
+        false_retrieval = np.zeros([len(FARs), len(ranks), score_mat_m.shape[0]], dtype=np.bool)
+        false_reject = np.zeros([len(FARs), len(ranks), score_mat_m.shape[0]], dtype=np.bool)
+        false_accept = np.zeros([len(FARs), len(ranks), score_mat_nm.shape[0]], dtype=np.bool)
+    for i, threshold in enumerate(thresholds):
+        for j, rank  in enumerate(ranks):
+            success_retrieval = sorted_label_mat_m[:,0:rank].any(axis=1)
+            if openset:
+                success_threshold = gt_score_m >= threshold
+                DIRs[i,j] = (success_threshold & success_retrieval).astype(np.float32).mean()
+            else:
+                DIRs[i,j] = success_retrieval.astype(np.float32).mean()
+            if get_false_indices:
+                false_retrieval[i,j] = ~success_retrieval
+                false_accept[i,j] = score_mat_nm.max(1) >= threshold
+                if openset:
+                    false_reject[i,j] = ~success_threshold
+        if score_mat_nm.shape[0] > 0:
+            FARs[i] = (max_score_nm >= threshold).astype(np.float32).mean()
+
+    if DIRs.shape[0] == 1 or DIRs.shape[1] == 1:
+        DIRs = DIRs.flatten()
+
+    if get_false_indices:
+        return DIRs, FARs, thresholds, match_indices, false_retrieval, false_reject, false_accept, sort_idx_mat_m
+    else:
+        return DIRs, FARs, thresholds
+
+
+# Find thresholds given FARs
+# but the real FARs using these thresholds could be different
+# the exact FARs need to recomputed using calcROC
+def find_thresholds_by_FAR(score_vec, label_vec, FARs=None, epsilon=1e-5):
+    #     Code borrowed from https://github.com/seasonSH/Probabilistic-Face-Embeddings
+
+    assert len(score_vec.shape)==1
+    assert score_vec.shape == label_vec.shape
+    assert label_vec.dtype == np.bool
+    score_neg = score_vec[~label_vec]
+    score_neg[::-1].sort()
+    # score_neg = np.sort(score_neg)[::-1] # score from high to low
+    num_neg = len(score_neg)
+
+    assert num_neg >= 1
+
+    if FARs is None:
+        thresholds = np.unique(score_neg)
+        thresholds = np.insert(thresholds, 0, thresholds[0]+epsilon)
+        thresholds = np.insert(thresholds, thresholds.size, thresholds[-1]-epsilon)
+    else:
+        FARs = np.array(FARs)
+        num_false_alarms = np.round(num_neg * FARs).astype(np.int32)
+
+        thresholds = []
+        for num_false_alarm in num_false_alarms:
+            if num_false_alarm==0:
+                threshold = score_neg[0] + epsilon
+            else:
+                threshold = score_neg[num_false_alarm-1]
+            thresholds.append(threshold)
+        thresholds = np.array(thresholds)
+
+    return thresholds
diff --git a/validation_lq/validate_tinyface.py b/validation_lq/validate_tinyface.py
new file mode 100644
index 0000000..1a3bc13
--- /dev/null
+++ b/validation_lq/validate_tinyface.py
@@ -0,0 +1,161 @@
+import torch
+import numpy as np
+from tqdm import tqdm
+import data_utils
+import argparse
+import pandas as pd
+import tinyface_helper
+import sys, os
+sys.path.insert(0, os.path.dirname(os.getcwd()))
+import net
+
+
+def str2bool(v):
+    if v.lower() in ('yes', 'true', 't', 'y', '1'):
+        return True
+    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+        return False
+    else:
+        raise argparse.ArgumentTypeError('Boolean value expected.')
+
+def l2_norm(input, axis=1):
+    """l2 normalize
+    """
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output, norm
+
+
+def fuse_features_with_norm(stacked_embeddings, stacked_norms, fusion_method='norm_weighted_avg'):
+
+    assert stacked_embeddings.ndim == 3 # (n_features_to_fuse, batch_size, channel)
+    if stacked_norms is not None:
+        assert stacked_norms.ndim == 3 # (n_features_to_fuse, batch_size, 1)
+    else:
+        assert fusion_method not in ['norm_weighted_avg', 'pre_norm_vector_add']
+
+    if fusion_method == 'norm_weighted_avg':
+        weights = stacked_norms / stacked_norms.sum(dim=0, keepdim=True)
+        fused = (stacked_embeddings * weights).sum(dim=0)
+        fused, _ = l2_norm(fused, axis=1)
+        fused_norm = stacked_norms.mean(dim=0)
+    elif fusion_method == 'pre_norm_vector_add':
+        pre_norm_embeddings = stacked_embeddings * stacked_norms
+        fused = pre_norm_embeddings.sum(dim=0)
+        fused, fused_norm = l2_norm(fused, axis=1)
+    elif fusion_method == 'average':
+        fused = stacked_embeddings.sum(dim=0)
+        fused, _ = l2_norm(fused, axis=1)
+        if stacked_norms is None:
+            fused_norm = torch.ones((len(fused), 1))
+        else:
+            fused_norm = stacked_norms.mean(dim=0)
+    elif fusion_method == 'concat':
+        fused = torch.cat([stacked_embeddings[0], stacked_embeddings[1]], dim=-1)
+        if stacked_norms is None:
+            fused_norm = torch.ones((len(fused), 1))
+        else:
+            fused_norm = stacked_norms.mean(dim=0)
+    elif fusion_method == 'faceness_score':
+        raise ValueError('not implemented yet. please refer to https://github.com/deepinsight/insightface/blob/5d3be6da49275602101ad122601b761e36a66a01/recognition/_evaluation_/ijb/ijb_11.py#L296')
+        # note that they do not use normalization afterward.
+    else:
+        raise ValueError('not a correct fusion method', fusion_method)
+
+    return fused, fused_norm
+
+
+def infer(model, dataloader, use_flip_test, fusion_method):
+    model.eval()
+    features = []
+    norms = []
+    with torch.no_grad():
+        for images, idx in tqdm(dataloader):
+
+            feature = model(images.to("cuda:0"))
+            if isinstance(feature, tuple):
+                feature, norm = feature
+            else:
+                norm = None
+
+            if use_flip_test:
+                fliped_images = torch.flip(images, dims=[3])
+                flipped_feature = model(fliped_images.to("cuda:0"))
+                if isinstance(flipped_feature, tuple):
+                    flipped_feature, flipped_norm = flipped_feature
+                else:
+                    flipped_norm = None
+
+                stacked_embeddings = torch.stack([feature, flipped_feature], dim=0)
+                if norm is not None:
+                    stacked_norms = torch.stack([norm, flipped_norm], dim=0)
+                else:
+                    stacked_norms = None
+
+                fused_feature, fused_norm = fuse_features_with_norm(stacked_embeddings, stacked_norms, fusion_method=fusion_method)
+                features.append(fused_feature.cpu().numpy())
+                norms.append(fused_norm.cpu().numpy())
+            else:
+                features.append(feature.cpu().numpy())
+                norms.append(norm.cpu().numpy())
+
+    features = np.concatenate(features, axis=0)
+    norms = np.concatenate(norms, axis=0)
+    return features, norms
+
+def load_pretrained_model(model_name='ir50'):
+    # load model and pretrained statedict
+    ckpt_path = adaface_models[model_name][0]
+    arch = adaface_models[model_name][1]
+
+    model = net.build_model(arch)
+    statedict = torch.load(ckpt_path)['state_dict']
+    model_statedict = {key[6:]:val for key, val in statedict.items() if key.startswith('model.')}
+    model.load_state_dict(model_statedict)
+    model.eval()
+    return model
+
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser(description='tinyface')
+
+    parser.add_argument('--data_root', default='/data/data/faces/tinyface_root')
+    parser.add_argument('--gpu', default=0, type=int, help='gpu id')
+    parser.add_argument('--batch_size', default=32, type=int, help='')
+    parser.add_argument('--model_name', type=str, default='ir101_webface4m')
+    parser.add_argument('--use_flip_test', type=str2bool, default='True')
+    parser.add_argument('--fusion_method', type=str, default='pre_norm_vector_add', choices=('average', 'norm_weighted_avg', 'pre_norm_vector_add', 'concat', 'faceness_score'))
+    args = parser.parse_args()
+
+    # load model
+    adaface_models = {
+        'ir50': ["../pretrained/adaface_ir50_ms1mv2.ckpt", 'ir_50'],
+        'ir101_ms1mv2': ["../pretrained/adaface_ir101_ms1mv2.ckpt", 'ir_101'],
+        'ir101_ms1mv3': ["../pretrained/adaface_ir101_ms1mv3.ckpt", 'ir_101'],
+        'ir101_webface4m': ["../pretrained/adaface_ir101_webface4m.ckpt", 'ir_101'],
+        'ir101_webface12m': ["../pretrained/adaface_ir101_webface12m.ckpt", 'ir_101'],
+    }
+    assert args.model_name in adaface_models
+    # load model
+    model = load_pretrained_model(args.model_name)
+    model.to('cuda:{}'.format(args.gpu))
+
+    tinyface_test = tinyface_helper.TinyFaceTest(tinyface_root=args.data_root,
+                                                 alignment_dir_name='aligned_pad_0.1_pad_high')
+
+    # set save root
+    gpu_id = args.gpu
+    save_path = os.path.join('./tinyface_result', args.model_name, "fusion_{}".format(args.fusion_method))
+
+    if not os.path.exists(save_path):
+        os.makedirs(save_path)
+    print('save_path: {}'.format(save_path))
+
+    img_paths = tinyface_test.image_paths
+    print('total images : {}'.format(len(img_paths)))
+    dataloader = data_utils.prepare_dataloader(img_paths,  args.batch_size, num_workers=0)
+    features, norms = infer(model, dataloader, use_flip_test=args.use_flip_test, fusion_method=args.fusion_method)
+    results = tinyface_test.test_identification(features, ranks=[1,5,20])
+    print(results)
+    pd.DataFrame({'rank':[1,5,20], 'values':results}).to_csv(os.path.join(save_path, 'result.csv'))
\ No newline at end of file