-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresize_organize_flexible.py
126 lines (106 loc) · 4.51 KB
/
resize_organize_flexible.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import glob
import pandas as pd
from pathlib import Path
from PIL import Image, ImageOps
import traceback
from numpy import asarray
from albumentations import CenterCrop
from tqdm.contrib.concurrent import process_map
import numpy as np
import shutil
# Load metadata etc
filenames_and_labels = r"C:\Users\Rodney\PycharmProjects\Thesis_cur-AI-tor\notebooks\portrait_cc_14_r23n23.lisa.surfsara.nl_48373_(960, 40, 'euclidean', 1, 100).csv"
target_column_name = 'class'
image_input_folder = r'C:\Users\Rodney\Desktop\saatchi\saatchi'
# image_input_folder = r'E:\temp\thesisdata\micro_dataset1'
image_output_folder = r'C:\Users\Rodney\Desktop\saatchi\umap_hdsbscan_test_portrait_(960, 40, euclidean, 1, 100)_res'
resize_and_crop_ = False
size_ = 128
image_count_per_class = 1000000
cropper = CenterCrop(height=size_, width=size_)
# Load target data
targets_df = pd.read_csv(filenames_and_labels, index_col=0)
# Remove unnecessary columns
for col in targets_df.columns:
if col != target_column_name:
targets_df.drop(col, axis=1, inplace=True)
# Remove duplicates
targets_df = pd.DataFrame(targets_df.reset_index().
drop_duplicates(subset=['index'])).\
set_index('index')
def resize_pad_crop_image(input_path: str,
output_path: str,
desired_size: int,
mode: str,
):
input_path_ = Path(input_path)
output_path_ = Path(output_path)
assert input_path_.is_file()
assert output_path_.is_dir(), print('Supplied output path is not a directory:' + output_path_.__str__())
if input_path_.stat().st_size > 0:
pass
else:
print(f'Filesize is 0, skipping file: {input_path_}')
return
filename = input_path_.name.replace('\n', '')
if mode == 'pad':
try:
img = Image.open(input_path)
old_size = img.size
ratio = float(desired_size) / max(old_size)
new_size = tuple([int(x * ratio) for x in old_size])
img = img.resize(new_size, Image.ANTIALIAS)
# create a new image and paste the resized on it
new_img = Image.new('RGB', (desired_size, desired_size))
new_img.paste(img, ((desired_size - new_size[0]) // 2,
(desired_size - new_size[1]) // 2))
full_output_path = output_path_ / filename
new_img.save(full_output_path)
except (OSError, IOError):
print(f'Opening image failed: \n {traceback.format_exc()}')
elif mode == 'crop':
try:
img = Image.open(input_path)
if asarray(img).shape[0] < size_:
img = ImageOps.pad(img, (size_, size_))
img = asarray(img)
img = cropper(image=img)
full_output_path = output_path_ / filename
img = Image.fromarray(img['image'])
if img.mode != 'RGB':
img = img.convert('RGB')
img.save(full_output_path)
except (OSError, IOError):
print(f'Opening image failed: \n {traceback.format_exc()}')
elif mode == 'move':
full_output_path = output_path_ / filename
shutil.copy(input_path, full_output_path)
# Create list with unique class labels
label_folder_list = list(np.unique(targets_df[target_column_name].values))
counter = {k: 0 for k in label_folder_list}
# Create the folders
for folder in label_folder_list:
Path(image_output_folder + '/' + str(folder)).mkdir(parents=True, exist_ok=True)
print('Resizing and moving files...')
def run(file):
filename = None
try:
if all(count >= image_count_per_class for count in counter.values()):
return
else:
filename = Path(file).name
label = targets_df.loc[filename][target_column_name]
if counter[label] < image_count_per_class:
image_output_folder_with_label = image_output_folder + '\\' + str(label)
resize_pad_crop_image(file, image_output_folder_with_label, size_, mode='move')
counter.update({label: counter[label] + 1})
except KeyError:
# print(f'Label not found for file {file}, skipping!')
pass
except OSError:
if filename is None:
filename = file
print(f'Skipping file {filename} due to OSError encountered: {traceback.format_exc()}, skipping!')
if __name__ == '__main__':
filelist = glob.glob(image_input_folder + '*/*')
r = process_map(run, filelist, max_workers=2, chunksize=10)