-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrelocalization.py
371 lines (301 loc) · 19.6 KB
/
relocalization.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import os
import cv2
import json
import time
import torch
from tqdm import tqdm
import argparse
import numpy as np
from pathlib import Path
from scipy.spatial.transform import Rotation as R
import general_utils
import database_utils as db
import visualization_utils
import localization_database_utils
# Helper functions
def invert_extrinsic_matrix(extrinsic_rot, extrinsic_t, global_scale=1):
"""
Convert the extrinsic matrix to T^(world)_(camera)
"""
temp_extrinsic = np.zeros((4, 4))
temp_extrinsic[:3, :3] = extrinsic_rot
temp_extrinsic[:3, 3] = extrinsic_t
temp_extrinsic[3, 3] = 1.0
training_extrinsic_1 = np.zeros_like(temp_extrinsic)
training_extrinsic_1[:3, :3] = np.transpose(temp_extrinsic[:3, :3])
training_extrinsic_1[:3, 3] = np.matmul(-np.transpose(temp_extrinsic[:3, :3]), temp_extrinsic[:3, 3]) / global_scale
training_extrinsic_1[3, 3] = 1.0
training_extrinsic_1 = general_utils.type_float_and_reshape(training_extrinsic_1, (4, 4))
return training_extrinsic_1
def adaptative_pose_estimation_PnP(image_name, point_responses, points_3d, points_2d,
response_threshold_interval, threshold_step, point_number_interval,
reprojection_error, point_info_dict, pose_save_dict,
camera_mtx=None, dist_coeffs=None):
"""
Solves PnP problem from a set of corresponding 2D and 3D points.
"""
if points_2d.shape[0] != points_3d.shape[0]:
print("Equal number of initial object and image points needed")
return [], [], []
minimum_condition = False
eval_response_threshold = response_threshold_interval[1]
while minimum_condition is False:
projection_indexes = np.where(point_responses.astype(np.float64) >= eval_response_threshold)
if (len(projection_indexes[0]) >= point_number_interval[0] and len(projection_indexes[0]) <= point_number_interval[1]) or eval_response_threshold <= response_threshold_interval[0]:
minimum_condition = True
else:
eval_response_threshold -= threshold_step
if (len(projection_indexes[0]) >= point_number_interval[0] and len(projection_indexes[0]) <= point_number_interval[1]):
points_2d = points_2d[projection_indexes]
points_3d = points_3d[projection_indexes]
if dist_coeffs is None:
dist_coeffs = np.zeros((5,1))
points_2d = points_2d.reshape((points_2d.shape[0], 1, 2)).astype(np.float32)
points_3d = points_3d.reshape((points_3d.shape[0], 1, 3)).astype(np.float64)
ret, rot, t, inliers = cv2.solvePnPRansac(objectPoints=points_3d, imagePoints=points_2d,
cameraMatrix=camera_mtx, distCoeffs=dist_coeffs,
reprojectionError=reprojection_error, flags=cv2.SOLVEPNP_EPNP)
if ret:
orientation = R.from_rotvec(rot.reshape((3)))
orientation = orientation.as_matrix()
t = t.reshape((3))
inverted_extrinsic = invert_extrinsic_matrix(extrinsic_rot=orientation, extrinsic_t=t)
point_info_dict[image_name] = (points_2d.shape[0], len(inliers), eval_response_threshold)
pose_save_dict[image_name] = (inverted_extrinsic[:3, :3], inverted_extrinsic[:3, 3])
return ret, point_info_dict, pose_save_dict
else:
return None, point_info_dict, pose_save_dict
else:
return None, point_info_dict, pose_save_dict
# Main functions
def image_based_localize_and_compute_correspondences(args, query_image_path, model,
preop_feature_array, preop_world_coordinates,
preop_image_names, preop_image_coordinates,
undistorted_mask_boundary, undistorted_mask_boundary_path,
localization_db_path):
"""
Performs Keypoint Relocalization on selected intraoperative image.
"""
query_feature_map, query_starts_h, query_starts_w = \
general_utils.image_dataloader(args=args, model=model, query_image_path=query_image_path,
undistorted_mask_boundary_path=undistorted_mask_boundary_path)
response_array = general_utils.feature_localization(query_feature_map=query_feature_map,
preop_feature_array=torch.from_numpy(preop_feature_array),
gpu_id=args.gpu_id)
del query_feature_map
response_array[:,1] = args.image_downsampling * (response_array[:,1] + query_starts_w)
response_array[:,2] = args.image_downsampling * (response_array[:,2] + query_starts_h)
# Disregard low convolution response correspondences
projection_indexes = np.where(response_array[:, 0] > args.minimum_response_threshold)
points2d = response_array[:,1:3][projection_indexes]
points3d = preop_world_coordinates[projection_indexes]
point_responses = response_array[:, 0][projection_indexes]
filtered_preop_image_names = preop_image_names[projection_indexes]
filtered_preop_image_coordinates = preop_image_coordinates[projection_indexes]
# Mask correspondences outside endoscope's FoV
valid_indexes = []
points2d = points2d.astype(np.int)
for j, point2d in enumerate(points2d):
if undistorted_mask_boundary[point2d[0], point2d[1]] > 0:
valid_indexes.append(j)
points2d_in = points2d[np.asarray(valid_indexes).astype(np.int)]
points3d_in = points3d[np.asarray(valid_indexes).astype(np.int)]
point_responses_in = point_responses[np.asarray(valid_indexes).astype(np.int)]
filtered_preop_image_names = filtered_preop_image_names[np.asarray(valid_indexes).astype(np.int)]
filtered_preop_image_coordinates = filtered_preop_image_coordinates[np.asarray(valid_indexes).astype(np.int)]
localization_database_utils.insert_image_correspondences(db_path=localization_db_path,
image_name=os.path.basename(query_image_path),
responses=point_responses_in,
points_2d=points2d_in,
points_3d=points3d_in,
preop_image_names=filtered_preop_image_names,
preop_image_coords=filtered_preop_image_coordinates)
def localize_correspondences(args):
"""
Performs Keypoint Relocalization over all intraoperative images and obtains 2D-3D correspondences.
"""
start = time.process_time()
# Definition of query image list
query_image_names = general_utils.get_all_color_image_names_in_sequence(Path(args.query_sequence_root))
query_image_names = [str(j) for j in query_image_names]
query_image_names = query_image_names[::args.query_subsampling]
# Reads base and query sequence database file
dreco_db_path = \
os.path.join(args.sequence_root, "dreco_base_{}_fill_{}.db".format(args.matching_model_description,
args.feature_model_description))
rdb = db.ReconstrunctionDB(db_path=dreco_db_path)
# Correspondence saving directory handling
SAVE_DIR = os.path.join(args.base_dir, args.exp_description)
if not os.path.exists(SAVE_DIR):
os.makedirs(SAVE_DIR)
# Correspondence database creation and setup
localization_db_path = os.path.join(SAVE_DIR, "localization.db")
localization_database_utils.create_localization_database(db_file_path=localization_db_path)
conn = localization_database_utils.create_connection(db_file=localization_db_path)
localization_database_utils.create_table(conn=conn)
# Saving commandline args for future lookup
with open(os.path.join(SAVE_DIR, "commandline_args"), 'w') as f:
f.write("script: {}".format(str(os.path.realpath(__file__))))
json.dump(args.__dict__, f, indent=2)
# Retrieval of feature database
print("Retrieving pre-operative feature and 3D point database...")
preop_image_names, preop_image_coordinates, preop_feature_array, preop_world_coordinates = rdb.get_all_point3d_features()
original_feat_number = preop_feature_array.shape[0]
print("Retrieved {} features in database!".format(original_feat_number))
# Imports mask for point correspondence rejection outside endoscope's FoV
undistorted_mask_boundary_path = os.path.join(args.sequence_root, "undistorted_mask.bmp")
undistorted_mask_boundary = cv2.imread(undistorted_mask_boundary_path, cv2.IMREAD_GRAYSCALE)
undistorted_mask_boundary = undistorted_mask_boundary.T
# Erode the boundary to remove near-boundary matches
kernel = np.ones((5, 5), np.uint8)
undistorted_mask_boundary = cv2.erode(undistorted_mask_boundary, kernel, iterations=args.erosion_iterations)
# Loads dense descriptor network for correspondence compute
model = general_utils.load_pretrained_model(feature_descriptor_model_path=Path(args.trained_model_path),
filter_growth_rate=args.filter_growth_rate,
feature_length=args.feature_length,
gpu_id=args.gpu_id)
print("Feature localization and correspondence computing...")
tq = tqdm(total=len(query_image_names))
for query_image_path in query_image_names:
query_image_index = os.path.basename(query_image_path).split(".")[0]
tq.set_description("Query image index: {}".format(int(query_image_index)))
image_based_localize_and_compute_correspondences(args=args, query_image_path=query_image_path, model=model,
preop_feature_array=preop_feature_array,
preop_world_coordinates=preop_world_coordinates,
preop_image_names=preop_image_names,
preop_image_coordinates=preop_image_coordinates,
undistorted_mask_boundary=undistorted_mask_boundary,
undistorted_mask_boundary_path=undistorted_mask_boundary_path,
localization_db_path=localization_db_path)
tq.update(1)
tq.close()
print("Feature localization for {} images in {} seconds".format(len(query_image_names), time.process_time() - start))
def correct_points_and_solve_PnP(args):
"""
Solves PnP problem with 2D-3D correspondences and performs pose estimation.
"""
start = time.process_time()
# PnP saving directory handling (branch from correspondence saving directory)
SAVE_DIR = os.path.join(args.base_dir, args.save_exp_description)
if not os.path.exists(SAVE_DIR):
os.makedirs(SAVE_DIR)
# Saving commandline args for future lookup
with open(os.path.join(SAVE_DIR, "commandline_args"), 'w') as f:
f.write("script: {}".format(str(os.path.realpath(__file__))))
json.dump(args.__dict__, f, indent=2)
# Reads database files
dreco_db_path = \
os.path.join(args.sequence_root, "dreco_base_{}_fill_{}.db".format(args.matching_model_description, args.feature_model_description))
rdb = db.ReconstrunctionDB(db_path=dreco_db_path)
localization_db_path = os.path.join(args.base_dir, args.exp_description, "localization.db")
query_image_names = localization_database_utils.retrieve_query_image_names(db_path=localization_db_path)
query_image_names = [i[0] for i in query_image_names]
# Extracts camera intrinsic parameters from base reconstruction (given by COLMAP)
camera_params = rdb.get_camera_params()
camera_params = camera_params[0]
camera_mtx = np.array([[camera_params[0], 0, camera_params[2]],
[0, camera_params[1], camera_params[3]],
[0, 0, 1]])
pose_save_dict, point_info_dict = {}, {}
print("Accumulating correspondences and solving PnP...")
tq = tqdm(total=len(query_image_names))
for j, query_image_path in enumerate(query_image_names):
query_img_idx = int(query_image_path.split(".")[0])
tq.set_description("query_img_idx: {}".format(query_img_idx))
point_responses, points2d, points3d, preop_image_names, preop_points2d = \
localization_database_utils.retrieve_image_correspondences(db_path=localization_db_path,
query_image_name=query_image_path)
ret, point_info_dict, pose_save_dict = \
adaptative_pose_estimation_PnP(image_name=query_image_path,
point_responses=point_responses,
points_3d=points3d, points_2d=points2d,
response_threshold_interval=[args.minimum_response_threshold, args.maximum_response_threshold],
threshold_step=args.response_threshold_step,
point_number_interval=[args.minimum_point_number, args.maximum_point_number],
reprojection_error=args.reprojection_error,
point_info_dict=point_info_dict, pose_save_dict=pose_save_dict,
camera_mtx=camera_mtx)
if ret is None:
tq.update(1)
continue
# Saving purposes
if j % 2 == 0 or j == (len(query_image_names) - 1):
torch.save(pose_save_dict, os.path.join(SAVE_DIR, "raw_query_pose_estimations.pth"))
torch.save(point_info_dict, os.path.join(SAVE_DIR, "point_number_dict.pth"))
# Visualization of point correspondences
if args.extra_visualizations:
# Keep responses over selected threshold for visualization
selected_indexes = np.where(point_responses.astype(np.float64) > point_info_dict[query_image_path][2])
points2d_in = points2d[selected_indexes]
preop_image_names_in = preop_image_names[selected_indexes]
preop_points2d_in = preop_points2d[selected_indexes]
query_image_path_complete = os.path.join(args.query_sequence_root, "images", query_image_path)
visualization_utils.display_point_feature_mapping(graph_preop_image_names=preop_image_names_in,
graph_preop_image_coordinates=preop_points2d_in.astype(np.float32),
query_image_path=query_image_path_complete,
graph_query_image_coordinates=points2d_in.astype(np.float32),
args=args)
visualization_utils.draw_correspondences(img_path=query_image_path_complete,
img_pts=points2d_in.astype(np.float32),
base_dir=os.path.join(args.base_dir, args.save_exp_description, "images_keypoints"))
tq.update(1)
tq.close()
print("Pose estimation for {} images in {} seconds ".format(len(pose_save_dict.keys()), round(time.process_time() - start, 3)))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Main camera relocalization",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# Path settings
parser.add_argument("--sequence_root", type=str, required=True,
help="Directory root of the preoperative video sequence")
parser.add_argument("--query_sequence_root", type=str, required=True,
help="Directory root of the intraoperative video sequence")
parser.add_argument("--exp_description", type=str, required=True,
help="Experiment description for file reading purposes")
parser.add_argument("--save_exp_description", type=str, required=True,
help="Experiment description for saving purposes")
parser.add_argument("--base_dir", type=str, required=True,
help="Save base directory")
# Dense descriptor model arguments
parser.add_argument("--image_downsampling", type=float, default=4.0,
help="Input image downsampling rate")
parser.add_argument("--trained_model_path", type=str, required=True,
help="Path to the trained descriptor model")
parser.add_argument("--feature_length", type=int, default=128,
help="Length of point descriptors (depends on output channel dimension of network)")
parser.add_argument("--filter_growth_rate", type=int, default=10,
help="Filter growth rate of network")
# General relocalization arguments
parser.add_argument("--mode", type=str, choices=["localize", "pnp", "all"], required=True,
help="Process step(s) to perform")
parser.add_argument("--gpu_id", type=int, default=0,
help="GPU ID for descriptor matching stage")
parser.add_argument("--query_subsampling", type=int, default=3, required=True,
help="Subsampling rate for query sequence in Keypoint Relocalization stage")
parser.add_argument("--reprojection_error", type=float, default=8.0,
help="Maximum allowed distance between observed/computed point projections to consider inliers")
parser.add_argument("--minimum_response_threshold", type=float, default=0.7, required=True,
help="Lower bound in response threshold dynamic method")
parser.add_argument("--maximum_response_threshold", type=float, default=1.0, required=True,
help="Upper bound in response threshold dynamic method")
parser.add_argument("--response_threshold_step", type=float, default=0.001, required=True,
help="Step size in response threshold dynamic method")
parser.add_argument("--minimum_point_number", type=int, default=10, required=True,
help="Minimum number of correspondences")
parser.add_argument("--maximum_point_number", type=int, default=100, required=True,
help="Maximum number of correspondences")
parser.add_argument('--matching_model_description', type=str, required=True,
help='Description of model providing base for matching')
parser.add_argument('--feature_model_description', type=str, required=True,
help='Description of model providing features to fill')
parser.add_argument('--extra_visualizations', action='store_true',
help='Save additional visualizations (not strictly necessary for reloc. process)')
parser.add_argument("--erosion_iterations", type=int, default=10,
help="Number of erosion iterations to avoid near-boundary matches")
args = parser.parse_args()
if args.mode == "pnp_eval":
correct_points_and_solve_PnP(args=args)
elif args.mode == "localize":
localize_correspondences(args=args)
else:
localize_correspondences(args=args)
correct_points_and_solve_PnP(args=args)