-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_utils.py
213 lines (201 loc) · 8.47 KB
/
my_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
from __future__ import print_function, division
import pandas as pd
from skimage import io, transform
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils,datasets,models
import torch.nn as nn
from skorch.callbacks import Callback
import random
from tqdm import tqdm
import os
from config import data_transforms
from matplotlib import pyplot as plt
from sklearn.manifold import TSNE
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
'''
Customized Dataset:
input:
dataset_csv: csv file with col: path (image path) and col: label
transform: torch transform, None by default
type: set type to train during training, set to test during testing
img_path: extracted from dataset.csv using df['path']
img_path: extracted from dataset.csv using df['label']
'''
class Alzheimer_Dataset(Dataset):
def __init__(self, img_path, labels, transform=None):
self.transform = transform
self.imgs = img_path
self.labels = labels
self.length = len(self.imgs)
def __len__(self):
return self.length
def __getitem__(self, index):
img_path = self.imgs[index]
image = io.imread(img_path)
label = self.labels[index]
if self.transform is not None:
image = self.transform(image)
return image, label
'''
initialize models
Adapted from https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html
'''
def get_pretrained_models(model_name):
NUM_OUTPUT = 4
class Pretrained_Model(nn.Module):
def __init__(self, output_features, model_name):
super().__init__()
if model_name == "res18":
model = models.resnet18(pretrained=True)
elif model_name == "res34":
model = models.resnet34(pretrained=True)
elif model_name == "res50":
model = models.resnet50(pretrained=True)
elif model_name == "res101":
model = models.resnet101(pretrained=True)
elif model_name == "res152":
model = models.resnet152(pretrained=True)
elif model_name == "squeeze":
model = models.squeezenet1_0(pretrained=True)
elif model_name == "vgg":
model = models.vgg11_bn(pretrained=True)
elif model_name == "alexnet":
model = models.alexnet(pretrained=True)
elif model_name == "densenet":
model = models.densenet121(pretrained=True)
else:
print("wrong name, quiting")
exit()
# adjusting model archetectures
if model_name.startswith("res") or model_name == "densenet":
# pretrained models take rgb image, we have gray image, thus 1, 64 instead of 3, 64
if model_name == "densenet":
model.features[0] = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, output_features)
else: # resnets
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, output_features)
elif model_name == "squeeze":
model.features[0] = nn.Conv2d(1, 96, kernel_size=7, stride=2)
model.classifier[1] = nn.Conv2d(512, output_features, kernel_size=(1, 1), stride=(1, 1))
model.num_classes = output_features
elif model_name == "vgg" or model_name == "alexnet":
if model_name == "alexnet":
model.features[0] = nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)
else:
model.features[0] = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs,output_features)
self.model = model
def forward(self, x):
return self.model(x)
input_size = 244
return Pretrained_Model(NUM_OUTPUT, model_name), input_size
'''
Remove the final layer of model
'''
def remove_last_layer(model, model_name):
if model_name == "vgg" or model_name == "alexnet" or model_name == "squeeze":
torch.nn.Sequential(*(list(model.model.children())[:-1]),
list(model.model.children())[-1][:-1])
return torch.nn.Sequential(*(list(model.model.children())[:-1]))
class Identity(nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
return x
def get_feature_vector(model_name):
device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")
csv_path = "./dataset.csv"
test_df = pd.read_csv(csv_path)
test_df['num_label'], _ = pd.factorize(test_df['label']) # turn labels (string) into numbers
test_df = test_df[test_df['type'] == "test"]
img_path = test_df['path'].values
labels = test_df['num_label'].values
model, input_size = get_pretrained_models(model_name)
model = remove_last_layer(model, model_name)
model.eval()
dataset_test = Alzheimer_Dataset(img_path, labels, transform=data_transforms['val'])
dataset_loader = torch.utils.data.DataLoader(dataset_test,
batch_size=128,
shuffle=False,
num_workers=8)
model.to(device)
feature_vec_nps = []
with tqdm(total=100) as pbar:
for index, batch in enumerate(dataset_loader):
# print("processing: %d/%d"%(index,len(dataset_loader)))
with torch.set_grad_enabled(False):
inputs, labels = batch
inputs = inputs.to(device)
outputs = model.forward(inputs)
for output in outputs:
feature_vec_nps.append(output.flatten().cpu().numpy())
pbar.update(100 / len(dataset_loader))
save_at = os.path.join("./feature_vectors", model_name + "_feature_vec.npy")
print("saving feature vectors at ",save_at )
with open(save_at, "wb") as f:
np.save(f, feature_vec_nps)
'''
making tsne visualization with selected model
get the feature vector by forwarding testing
images to the selected model after deleting the fully connected layer
Args:
model_name: name of the model
sample_imgs: number of testing images to sample
default = -1 with no sampling (using all testing images)
should be <= total number of testing images
feature_avaiable: bool
set to True, if "model_name"_feature_vec.npy is available in feature_vectors dir
set to False otherwise
'''
def tsne_visualization(model_name, sample_imgs = -1, feature_available=True):
if not feature_available:
get_feature_vector(model_name)
# getting feature vectors
feature_path = os.path.join("feature_vectors/", model_name + "_feature_vec.npy")
with open(feature_path, "rb") as f:
features = np.load(f)
# load labels
df = pd.read_csv("dataset.csv")
df['label'], label_names = pd.factorize(df['label'])
test_df = df[df['type'] == "test"]
if sample_imgs != -1:
test_df = test_df.sample(n=sample_imgs, random_state=42)
img_path = test_df['path']
labels = test_df['label'].values
indices = test_df.index
# tsne dimension reduction (2-d)
tsne = TSNE(n_components=2, random_state=42)
X_2d = tsne.fit_transform(features[indices])
target_ids = range(len(set(labels))) # 4 classes
# make plot
plt.figure(figsize=(10, 8))
colors = 'darkred', 'salmon', 'lightpink', 'lightblue'
for i, c, label in zip(target_ids, colors, label_names):
plt.scatter(X_2d[labels == i, 0], X_2d[labels == i, 1], c=c, label=label)
plt.title("tsne using feature vector from: " + model_name, fontsize=15)
plt.xticks([])
plt.yticks([])
plt.legend()
plt.show()
'''
set seed to reproduce the results
optionally takes seed as argument, default seed 42
'''
class FixRandomSeed(Callback):
def __init__(self, seed=42):
self.seed = seed
def initialize(self):
# print("setting random seed to: ", self.seed)
torch.manual_seed(self.seed)
torch.cuda.manual_seed(self.seed)
random.seed(self.seed)
np.random.seed(self.seed)
torch.backends.cudnn.deterministic = True