-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclip_explicit_prior_encoder_PALM.py
190 lines (142 loc) · 6.24 KB
/
clip_explicit_prior_encoder_PALM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import torch
import clip
from PIL import Image, ImageOps
from torch import nn
from contextlib import contextmanager
import numpy as np
import os
import pandas as pd
def min_max_normalize(tensor, min_val=0.0, max_val=1.0):
# Get the minimum and maximum values from the tensor
min_tensor = tensor.min()
max_tensor = tensor.max()
# Perform min-max normalization
norm_tensor = (tensor - min_tensor) / (max_tensor - min_tensor + 1e-10)
# Scale to the desired range [min_val, max_val]
norm_tensor = norm_tensor * (max_val - min_val) + min_val
return norm_tensor
def resize_and_pad_image(input_image, target_size=(1024, 1024)):
original_size=input_image.size
# rescaling image, let the longer side is equal to the longer side of the target size
ratio = min(target_size[0]/original_size[0], target_size[1]/original_size[1])
new_size = (int(original_size[0]*ratio), int(original_size[1]*ratio))
# rescaling
resize_image = input_image.resize(new_size, Image.BICUBIC)
# get num of pixels to fill
delta_w = target_size[0]-new_size[0]
delta_h = target_size[1]-new_size[1]
padding = (delta_w//2, delta_h//2, delta_w-(delta_w//2), delta_h-(delta_h//2))
# filling using black color
padded_image = ImageOps.expand(resize_image, padding, fill=(0, 0, 0))
return padded_image
class PriorExtractor:
def __init__(self, model_name='ViT-B/16', device=None):
self.device = device
self.model, self.preprocess = clip.load(model_name, device=self.device)
self.feature_map = None
@contextmanager
def register_transformer_hook(self):
def hook_fn(module, input, output):
self.feature_map = output
hook = self.model.visual.transformer.register_forward_hook(hook_fn)
try:
yield
finally:
hook.remove()
def min_max_normalize(self, tensor, min_val=0.0, max_val=1.0):
# Get the minimum and maximum values from the tensor
min_tensor = tensor.min()
max_tensor = tensor.max()
# Perform min-max normalization
norm_tensor = (tensor - min_tensor) / (max_tensor - min_tensor + 1e-10)
# Scale to the desired range [min_val, max_val]
norm_tensor = norm_tensor * (max_val - min_val) + min_val
return norm_tensor
def get_visual_features(self, image):
with self.register_transformer_hook():
with torch.no_grad():
_ = self.model.encode_image(image.type(self.model.visual.conv1.weight.dtype))
if self.feature_map is None:
raise ValueError("Hook did not capture the transformer output.")
permuted_feature_map = self.feature_map.permute(1, 0, 2)
after_ln_post = self.model.visual.ln_post(permuted_feature_map[:, 1:, :])
after_proj = after_ln_post @ self.model.visual.proj
# print('after_proj shape:', after_proj.shape)
return after_proj
def compute_similarity(self, P_v, P_t):
P_s = torch.matmul(P_v, P_t.transpose(-1, -2).unsqueeze(0))
return P_s
def extract_prior(self, image_path, text_prompt):
input_image = Image.open(image_path).convert("RGB")
resize_image = resize_and_pad_image(input_image)
image = self.preprocess(resize_image).unsqueeze(0).to(self.device)
# image = self.preprocess(Image.open(image_path)).unsqueeze(0).to(self.device)
text = clip.tokenize([text_prompt]).to(self.device)
with torch.no_grad():
image_features = self.get_visual_features(image)
text_features = self.model.encode_text(text)
P_v = image_features / image_features.norm(dim=-1, keepdim=True)
P_t = text_features / text_features.norm(dim=-1, keepdim=True)
P_s = self.compute_similarity(P_v, P_t)
P_s_prime = P_s.reshape(1, 14, 14)
P_e = self.min_max_normalize(P_s_prime)
P_e_resized = nn.functional.interpolate(P_e.unsqueeze(1), size=(64, 64), mode='bilinear').squeeze(1)
# print('P_e_resized shape:', P_e_resized.shape)
return P_e_resized
def get_explict_prior_npz(extractor, set_type, image_name, mask_type):
assert set_type in ('1. Training Set', '2. Validation Set', '3. Testing Set')
assert mask_type in ('Atrophy', '1. Lacquer Cracks', '2. Choroidal Neovascularization', '3. Fuchs Spot')
image_dir = '/data/home/litingyao/project/SAM/data/PALM_MY'
save_dir = '/data/home/litingyao/project/SAM/data/PALM_NPY'
if set_type == '1. Training Set':
img_set = 'Train_images'
mask_set = 'Train_atrophy_masks'
prior_set = 'Train_prior'
elif set_type == '2. Validation Set':
img_set = 'Valid_images'
mask_set = 'Valid_atrophy_masks'
prior_set = 'Valid_prior'
elif set_type == '3. Testing Set':
img_set = 'Test_images'
mask_set = 'Test_atrophy_masks'
prior_set = 'Test_prior'
image_set_dir = os.path.join(image_dir, img_set)
gt_dir = os.path.join(image_dir, mask_set)
image_path = os.path.join(image_set_dir, image_name)
if mask_type == '3. Fuchs Spot':
text_description = 'Pigmented grayish white scar'
elif mask_type == '2. Choroidal Neovascularization':
text_description = ''
elif mask_type == '1. Lacquer Cracks':
text_description = 'Yellowish thick linear lesions in the macula'
elif mask_type == 'Atrophy':
text_description = 'Pale and white appearance scattered whole area or circular oval around optic disc'
ex_prior_torch = extractor.extract_prior(image_path, text_description)
ex_prior_numpy = ex_prior_torch.cpu().numpy()
save_set_dir = os.path.join(save_dir, prior_set)
os.makedirs(save_set_dir, exist_ok=True)
save_npy_path = os.path.join(save_set_dir, image_name.split('.')[0]+'.npy')
np.save(save_npy_path, ex_prior_numpy)
if __name__ == '__main__':
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
extractor = PriorExtractor(device=device)
for set_type in ['1. Training Set', '2. Validation Set', '3. Testing Set']:
image_dir = '/data/home/litingyao/project/SAM/data/PALM_MY'
if set_type == '1. Training Set':
img_set = 'Train_images'
mask_set = 'Train_atrophy_masks'
prior_set = 'Train_prior'
elif set_type == '2. Validation Set':
img_set = 'Valid_images'
mask_set = 'Valid_atrophy_masks'
prior_set = 'Valid_prior'
elif set_type == '3. Testing Set':
img_set = 'Test_images'
mask_set = 'Test_atrophy_masks'
prior_set = 'Test_prior'
image_set_dir = os.path.join(image_dir, img_set)
for mask_type in ['Atrophy']:
image_list = sorted(os.listdir(image_set_dir))
print(len(image_list))
for image_name in image_list:
get_explict_prior_npz(extractor, set_type, image_name, mask_type)