-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_dataset.py
62 lines (50 loc) · 2.22 KB
/
generate_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from utils import *
import pathlib
from threading import Thread
DIM = 512
DIR = BASE_DIR
CQPP = 38.2185141426
def thread_function(starting_image_path, full_name, t, x, y, step, lake_geom):
image_path = DIR / (t + "_image") / (full_name + f"_{x}_{y}.tif")
label_path = DIR / (t + "_label") / (full_name + f"_{x}_{y}.tif")
generate_image(starting_image_path, get_square(x, y, step), image_path)
if t != "test":
generate_label(image_path, lake_geom, label_path)
def generate_dataset():
if not os.path.exists(DIR / "train_image"):
os.mkdir(DIR / "train_image")
if not os.path.exists(DIR / "train_label"):
os.mkdir(DIR / "train_label")
if not os.path.exists(DIR / "test_image"):
os.mkdir(DIR / "test_image")
lakes_train_test = DIR / "lakes_regions.gpkg"
lakes_regions_path = DIR / "lake_polygons_training.gpkg"
regions = gp.read_file(lakes_train_test)
lakes_regions = gp.read_file(lakes_regions_path)
ts = ['train', 'test']
for t in ts:
full_names = os.listdir(DIR / t)
for full_name in full_names:
image, region_num = get_image_and_region(full_name)
lake_geom = lakes_regions[(lakes_regions['image'] == image) & (lakes_regions['region_num'] == region_num)]['geometry']
# sliding window to crop big image
# coodrinate quantum per pixel is 38.2185141426
big_rect = get_external_rectangle(regions, region_num)
#print(big_rect)
xx, yy = big_rect.iloc[0].exterior.coords.xy
#print(xx)
#print(yy)
xmin, xmax = min(xx), max(xx)
ymin, ymax = min(yy), max(yy)
step = (DIM-1) * CQPP
thread_list = []
starting_image_path = DIR / t / full_name
for x in np.arange(xmin, xmax, step / 2):
for y in np.arange(ymin, ymax, step / 2):
thread_list.append(Thread(target=thread_function, args=(starting_image_path, full_name, t, x, y, step, lake_geom)))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
if __name__ == "__main__":
generate_dataset()