-
Notifications
You must be signed in to change notification settings - Fork 89
/
generate_xray_nearest.py
136 lines (109 loc) · 4.93 KB
/
generate_xray_nearest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
'''
# author: Zhiyuan Yan
# email: zhiyuanyan@link.cuhk.edu.cn
# date: 2023-03-30
The code is specifically designed for generating nearest sample pairs for Face X-ray.
Alternatively, you can utilize the pre-generated pkl files available in our GitHub repository. Please refer to the "Releases" section on our repository for accessing these files.
'''
import os
import json
import pickle
import numpy as np
import heapq
import random
from tqdm import tqdm
from scipy.spatial import KDTree
def load_landmark(file_path):
"""
Load 2D facial landmarks from a file path.
Args:
file_path: A string indicating the path to the landmark file.
Returns:
A numpy array containing the loaded landmarks.
Raises:
None.
"""
if file_path is None:
return np.zeros((81, 2))
if os.path.exists(file_path):
landmark = np.load(file_path)
return np.float32(landmark)
else:
return np.zeros((81, 2))
def get_landmark_dict(dataset_folder):
# Check if the dictionary has already been created
if os.path.exists('landmark_dict_ff.pkl'):
with open('landmark_dict_ff.pkl', 'rb') as f:
return pickle.load(f)
# Open the metadata file for the current folder
metadata_path = os.path.join(dataset_folder, "FaceForensics++.json")
with open(metadata_path, "r") as f:
metadata = json.load(f)
# Iterate over the metadata entries and add the landmark paths to the list
ff_real_data = metadata['FaceForensics++']['FF-real']
# Using dictionary comprehension to generate the landmark_dict
landmark_dict = {
frame_path.replace('frames', 'landmarks').replace(".png", ".npy"): load_landmark(
frame_path.replace('frames', 'landmarks').replace(".png", ".npy")
)
for mode, value in ff_real_data.items()
for video_name, video_info in tqdm(value['c23'].items())
for frame_path in video_info['frames']
}
# Save the dictionary to a pickle file
with open('landmark_dict_ffall.pkl', 'wb') as f:
pickle.dump(landmark_dict, f)
return landmark_dict
def get_nearest_faces_fixed_pair(landmark_info, num_neighbors):
'''
Using KDTree to find the nearest faces for each image (Much faster!!)
'''
random.seed(1024) # Fix the random seed for reproducibility
# Check if the dictionary has already been created
if os.path.exists('nearest_face_info.pkl'):
with open('nearest_face_info.pkl', 'rb') as f:
return pickle.load(f)
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
landmark_ids = list(landmark_info.keys())
# Build a KDTree using the flattened landmarks
tree = KDTree(landmarks_array)
nearest_faces = {}
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
# Query the KDTree for the nearest neighbors (excluding itself)
dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
# Randomly pick one from the nearest N neighbors (excluding itself)
picked_idx = random.choice(indices[1:])
nearest_faces[landmark_ids[idx]] = landmark_ids[picked_idx]
# Save the dictionary to a pickle file
with open('nearest_face_info.pkl', 'wb') as f:
pickle.dump(nearest_faces, f)
return nearest_faces
def get_nearest_faces(landmark_info, num_neighbors):
'''
Using KDTree to find the nearest faces for each image (Much faster!!)
'''
random.seed(1024) # Fix the random seed for reproducibility
# Check if the dictionary has already been created
if os.path.exists('nearest_face_info.pkl'):
with open('nearest_face_info.pkl', 'rb') as f:
return pickle.load(f)
landmarks_array = np.array([lmk.flatten() for lmk in landmark_info.values()])
landmark_ids = list(landmark_info.keys())
# Build a KDTree using the flattened landmarks
tree = KDTree(landmarks_array)
nearest_faces = {}
for idx, this_lmk in tqdm(enumerate(landmarks_array), total=len(landmarks_array)):
# Query the KDTree for the nearest neighbors (excluding itself)
dists, indices = tree.query(this_lmk, k=num_neighbors + 1)
# Store the nearest N neighbors (excluding itself)
nearest_faces[landmark_ids[idx]] = [landmark_ids[i] for i in indices[1:]]
# Save the dictionary to a pickle file
with open('nearest_face_info.pkl', 'wb') as f:
pickle.dump(nearest_faces, f)
return nearest_faces
# Load the landmark dictionary and obtain the landmark dict
dataset_folder = "/home/zhiyuanyan/disfin/deepfake_benchmark/preprocessing/dataset_json/"
landmark_info = get_landmark_dict(dataset_folder)
# Get the nearest faces for each image (in landmark_dict)
num_neighbors = 100
nearest_faces_info = get_nearest_faces(landmark_info, num_neighbors) # running time: about 20 mins