forked from niconielsen32/ComputerVision
-
Notifications
You must be signed in to change notification settings - Fork 0
/
depthToPointCloud.py
124 lines (85 loc) · 3.43 KB
/
depthToPointCloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import cv2
import torch
import time
import numpy as np
# Q matrix - Camera parameters - Can also be found using stereoRectify
Q = np.array(([1.0, 0.0, 0.0, -160.0],
[0.0, 1.0, 0.0, -120.0],
[0.0, 0.0, 0.0, 350.0],
[0.0, 0.0, 1.0/90.0, 0.0]),dtype=np.float32)
# Load a MiDas model for depth estimation
model_type = "DPT_Large" # MiDaS v3 - Large (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid" # MiDaS v3 - Hybrid (medium accuracy, medium inference speed)
#model_type = "MiDaS_small" # MiDaS v2.1 - Small (lowest accuracy, highest inference speed)
midas = torch.hub.load("intel-isl/MiDaS", model_type)
# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()
# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
transform = midas_transforms.dpt_transform
else:
transform = midas_transforms.small_transform
# Open up the video capture from a webcam
cap = cv2.VideoCapture(2)
while cap.isOpened():
success, img = cap.read()
start = time.time()
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Apply input transforms
input_batch = transform(img).to(device)
# Prediction and resize to original resolution
with torch.no_grad():
prediction = midas(input_batch)
prediction = torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=img.shape[:2],
mode="bicubic",
align_corners=False,
).squeeze()
depth_map = prediction.cpu().numpy()
depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
#Reproject points into 3D
points_3D = cv2.reprojectImageTo3D(depth_map, Q, handleMissingValues=False)
#Get rid of points with value 0 (i.e no depth)
mask_map = depth_map > 0.4
#Mask colors and points.
output_points = points_3D[mask_map]
output_colors = img[mask_map]
end = time.time()
totalTime = end - start
fps = 1 / totalTime
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
depth_map = (depth_map*255).astype(np.uint8)
depth_map = cv2.applyColorMap(depth_map , cv2.COLORMAP_MAGMA)
cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
cv2.imshow('Image', img)
cv2.imshow('Depth Map', depth_map)
if cv2.waitKey(5) & 0xFF == 27:
break
# --------------------- Create The Point Clouds ----------------------------------------
#Function to create point cloud file
def create_output(vertices, colors, filename):
colors = colors.reshape(-1,3)
vertices = np.hstack([vertices.reshape(-1,3),colors])
ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''
with open(filename, 'w') as f:
f.write(ply_header %dict(vert_num=len(vertices)))
np.savetxt(f,vertices,'%f %f %f %d %d %d')
output_file = 'pointCloudDeepLearning.ply'
#Generate point cloud
create_output(output_points, output_colors, output_file)
cap.release()
cv2.destroyAllWindows()