-
Notifications
You must be signed in to change notification settings - Fork 0
/
feat_extract_largescale.py
141 lines (121 loc) · 6.52 KB
/
feat_extract_largescale.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#! /usr/bin/env python3
import torch
import os
import pdb
from util.args_loader import get_args
from util.data_loader import get_loader_in, get_loader_out
from util.model_loader import get_model
import numpy as np
import torch.nn.functional as F
torch.manual_seed(1)
torch.cuda.manual_seed(1)
np.random.seed(1)
device = 'cuda:3' if torch.cuda.is_available() else 'cpu'
args = get_args()
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
loader_in_dict = get_loader_in(args, config_type="eval", split=('train', 'val'))
trainloaderIn, testloaderIn, num_classes = loader_in_dict.train_loader, loader_in_dict.val_loader, loader_in_dict.num_classes
model = get_model(args, num_classes, load_ckpt=True) # set true to load from ash_ckpt in their repo; essentially the same as torch ckpt
model.to(device)
batch_size = args.batch_size
featdim = {
'resnet50': 2048,
'resnet50-supcon': 2048,
}[args.model_arch]
FORCE_RUN = True
ID_RUN = True
OOD_RUN = True
if ID_RUN:
for split, in_loader in [('val', testloaderIn), ('train', trainloaderIn)]:
cache_dir = f"cache/{args.in_dataset}_{split}_{args.name}_in"
if FORCE_RUN or not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)
feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='w+', shape=(len(in_loader.dataset), featdim))
score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='w+', shape=(len(in_loader.dataset), num_classes))
label_log = np.memmap(f"{cache_dir}/label.mmap", dtype=float, mode='w+', shape=(len(in_loader.dataset),))
model.to(device)
model.eval()
with torch.no_grad():
for batch_idx, (inputs, targets) in enumerate(in_loader):
inputs, targets = inputs.to(device), targets.to(device)
start_ind = batch_idx * batch_size
end_ind = min((batch_idx + 1) * batch_size, len(in_loader.dataset))
if args.model_arch == 'resnet50-supcon':
out = model.encoder(inputs)
else:
out = model.features(inputs)
if len(out.shape) > 2:
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
score = model.fc(out)
# score = net(inputs)
feat_log[start_ind:end_ind, :] = out.data.cpu().numpy()
label_log[start_ind:end_ind] = targets.data.cpu().numpy()
score_log[start_ind:end_ind] = score.data.cpu().numpy()
if batch_idx % 100 == 0:
print(f"{batch_idx}/{len(in_loader)}")
else:
feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='r', shape=(len(in_loader.dataset), featdim))
score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='r', shape=(len(in_loader.dataset), num_classes))
label_log = np.memmap(f"{cache_dir}/label.mmap", dtype=float, mode='r', shape=(len(in_loader.dataset),))
if OOD_RUN:
for ood_dataset in args.out_datasets:
loader_test_dict = get_loader_out(args, dataset=(None, ood_dataset), split=('val'))
out_loader = loader_test_dict.val_ood_loader
cache_dir = f"cache/{ood_dataset}vs{args.in_dataset}_{args.name}_out"
if FORCE_RUN or not os.path.exists(cache_dir):
os.makedirs(cache_dir, exist_ok=True)
ood_feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='w+', shape=(len(out_loader.dataset), featdim))
ood_score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='w+', shape=(len(out_loader.dataset), num_classes))
model.eval()
with torch.no_grad():
for batch_idx, (inputs, _) in enumerate(out_loader):
inputs = inputs.to(device)
start_ind = batch_idx * batch_size
end_ind = min((batch_idx + 1) * batch_size, len(out_loader.dataset))
if args.model_arch == 'resnet50-supcon':
out = model.encoder(inputs)
else:
out = model.features(inputs)
if len(out.shape) > 2:
out = F.adaptive_avg_pool2d(out, 1)
out = out.view(out.size(0), -1)
score = model.fc(out)
ood_feat_log[start_ind:end_ind, :] = out.data.cpu().numpy()
ood_score_log[start_ind:end_ind] = score.data.cpu().numpy()
if batch_idx % 100 == 0:
print(f"{batch_idx}/{len(out_loader)}")
else:
ood_feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='r', shape=(len(out_loader.dataset), featdim))
ood_score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='r', shape=(len(out_loader.dataset), num_classes))
# loader_test_dict = get_loader_out(args, dataset=(None, 'noise'), split=('val'))
# out_loader = loader_test_dict.val_ood_loader
#
# cache_dir = f"cache/{'noise'}vs{args.in_dataset}_{args.name}_out"
# if FORCE_RUN or not os.path.exists(cache_dir):
# os.makedirs(cache_dir, exist_ok=True)
# ood_feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='w+', shape=(len(out_loader.dataset), featdim))
# ood_score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='w+', shape=(len(out_loader.dataset), num_classes))
# model.eval()
# for batch_idx, (inputs, _) in enumerate(out_loader):
# inputs = inputs.to(device).float()
# start_ind = batch_idx * batch_size
# end_ind = min((batch_idx + 1) * batch_size, len(out_loader.dataset))
#
# if args.model_arch == 'resnet50-supcon':
# out = model.encoder(inputs)
# else:
# out = model.features(inputs)
# if len(out.shape) > 2:
# out = F.adaptive_avg_pool2d(out, 1)
# out = out.view(out.size(0), -1)
# score = model.fc(out)
# # score = net(inputs)
# ood_feat_log[start_ind:end_ind, :] = out.data.cpu().numpy()
# ood_score_log[start_ind:end_ind] = score.data.cpu().numpy()
# if batch_idx % 100 == 0:
# print(f"{batch_idx}/{len(out_loader)}")
#
# else:
# ood_feat_log = np.memmap(f"{cache_dir}/feat.mmap", dtype=float, mode='r', shape=(len(out_loader.dataset), featdim))
# ood_score_log = np.memmap(f"{cache_dir}/score.mmap", dtype=float, mode='r', shape=(len(out_loader.dataset), num_classes))