forked from bacnguyencong/otoliths-identification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess.py
157 lines (112 loc) · 4.01 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# coding: utf-8
# # Otoliths predictions
# Segment all images
from skimage.io import imread, imsave
from util.useful_imports import *
# # Data processing
# ## 1. Partitioning data into training and test
# ### 1.1. Segment the training
if os.path.exists(REF_SEG_DIR):
shutil.rmtree(REF_SEG_DIR)
os.makedirs(REF_SEG_DIR)
threshold = 0.25
remove_bg = False
conv_sigma = 2.0
opening_size = 30
for label in os.listdir(ROOT_DIR):
cur_dir = os.path.join(ROOT_DIR, label)
tar_dir = os.path.join(REF_SEG_DIR, label)
if not os.path.exists(tar_dir):
os.makedirs(tar_dir)
img_list = glob.glob(cur_dir + '/*.jpg')
print(label)
for image_path in img_list:
image = imread(image_path)
images_names = os.path.basename(image_path)
images_names = images_names[0:-4] + '_{}.jpg'
regions = ut.segment_image(image, remove_bg, conv_sigma,
opening_size)
for i, reg in enumerate(regions):
if len(images_names):
min_row, min_col, max_row, max_col = reg.bbox
segm_im = image[min_row:max_row][:, min_col:max_col]
if segm_im.size / 8192 < 20.0: # this are noise
continue
if segm_im.shape[0] == 1536 and segm_im.shape[1] == 2048: # cannot segment
continue
imsave(os.path.join(tar_dir, images_names.format(i+1)), segm_im)
# Partition the data to train and valid
# create train and valid directories
if os.path.exists(TRAIN_DIR):
shutil.rmtree(TRAIN_DIR)
os.makedirs(TRAIN_DIR)
if os.path.exists(VALID_DIR):
shutil.rmtree(VALID_DIR)
os.makedirs(VALID_DIR)
train_per = 0.9
rand = np.random.RandomState(123)
# making a partition of training and valid sets
for dire in os.listdir(REF_SEG_DIR):
# create path for train
p1 = os.path.join(TRAIN_DIR, dire)
if not os.path.exists(p1):
os.makedirs(p1)
# create path for valid
p2 = os.path.join(VALID_DIR, dire)
if not os.path.exists(p2):
os.makedirs(p2)
img_list = glob.glob(os.path.join(REF_SEG_DIR, dire) + '/*.jpg')
n = len(img_list)
rp = rand.permutation(n)
# number of training images
train = math.floor(train_per * n)
for i in range(n):
j = rp[i]
filepath = img_list[j]
filename = os.path.basename(filepath)
if i < train or n <= 5:
copyfile(filepath, p1 + '/' + filename)
if i >= train or n <= 5:
copyfile(filepath, p2 + '/' + filename)
# delete the folder
shutil.rmtree(REF_SEG_DIR)
# ## 2. Create test data
"""
# Prepare a list of test images, containing all paths
img_list = []
for dire in os.listdir(SAMPLE_DIR):
my_dir = os.path.join(SAMPLE_DIR, dire)
for subdir in os.listdir(my_dir):
cur_dir = os.path.join(my_dir, subdir)
if not os.path.isdir(cur_dir):
continue
# print(cur_dir)
jpg_list = glob.glob(cur_dir + '/*.jpg')
tif_list = glob.glob(cur_dir + '/*.tif')
img_list.extend(jpg_list)
for tif_file in tif_list:
name = os.path.basename(tif_file)
name = name[0:-4]
# check if format .jpg does not exist in the list
if name + '.jpg' not in jpg_list:
img_list.append(tif_file)
print("Segmenting the test data")
if os.path.exists(TEST_DIR):
shutil.rmtree(TEST_DIR)
os.makedirs(TEST_DIR)
threshold = 0.25
remove_bg = False
conv_sigma = 2.0
opening_size = 30
for image_path in img_list:
image = imread(image_path)
images_names = os.path.basename(image_path)
images_names = images_names[0:-4] + '_{}.jpg'
regions = ut.segment_image(image, remove_bg, threshold, conv_sigma,
opening_size)
for i, reg in enumerate(regions):
if len(images_names):
min_row, min_col, max_row, max_col = reg.bbox
segm_im = image[min_row:max_row][:,min_col:max_col]
imsave(os.path.join(TEST_DIR, images_names.format(i+1)), segm_im)
"""