Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

estimate_calibrated_two_view_geometry(): incompatible function arguments #292

Open
mganglb opened this issue Nov 11, 2024 · 1 comment
Open

Comments

@mganglb
Copy link

mganglb commented Nov 11, 2024

Hi,

  • Win11
  • pycolmap 0.6 (since there is a known problem with pycolmap 3.10.0 and win)

I am facing problems utilizing estimate_calibrated_two_view_geometry, estimate_calibrated_two_view_geometry, estimate_two_view_geometry_pose. Annoyingly, I can't figure out where my error is. Estimating the Essential Matrix with the same arguments works without problems. (see also example script)

Thanks in advance.
Markus

TypeError: estimate_calibrated_two_view_geometry(): incompatible function arguments. The following argument types are supported:
    1. (camera1: pycolmap.Camera, points1: numpy.ndarray[numpy.float64[m, 2]], camera2: pycolmap.Camera, points2: numpy.ndarray[numpy.float64[m, 2]], matches: numpy.ndarray[numpy.uint32[m, 2]] = None, options: pycolmap.TwoViewGeometryOptions = <pycolmap.TwoViewGeometryOptions object at 0x000001CDDD695D70>) -> pycolmap.TwoViewGeometry

Invoked with: Camera(camera_id=0, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy)), array([[ 103.25252533,  627.5736084 ],
       [ 635.10766602,  293.56375122],
       [1170.61694336,  609.92657471],
       [ 995.36523438,  359.21173096],
       [1038.58496094,  416.06182861],
       [1181.74780273,  393.3609314 ],
       [ 830.24676514,  512.47650146],
       [ 619.87194824,  591.88787842],
       [ 814.02850342,  355.37408447],
       [ 910.16351318,  650.93408203]]), Camera(camera_id=1, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy)), array([[ 961.42596436,  386.71902466],        
       [1213.98132324,  658.73760986],
       [1173.48864746,  609.75372314],
       [ 998.50354004,  362.80661011],
       [1038.75439453,  413.33563232],
       [1181.22009277,  393.20046997],
       [ 829.9644165 ,  512.37768555],
       [ 616.26953125,  592.09075928],
       [ 812.72821045,  356.39505005],
       [ 909.18566895,  650.11999512]])

Complete Script (call of estimate_calibrated_two_view_geometry at the end of the script)

from pathlib import Path
from PIL import Image, ImageOps
import numpy as np
import cv2 as cv2
from scipy.spatial.transform import Rotation as R_
import torch
import pycolmap

def mutual_nn_matcher(descriptors1, descriptors2):
    # Mutual nearest neighbors (NN) matcher for L2 normalized descriptors.
    device = descriptors1.device
    sim = descriptors1 @ descriptors2.t()
    nn_sim, nn12 = torch.max(sim, dim=1)
    nn_dist = torch.sqrt(2 - 2 * nn_sim)
    nn21 = torch.max(sim, dim=0)[1]
    ids1 = torch.arange(0, sim.shape[0], device=device)
    mask = (ids1 == nn21[nn12])
    matches = torch.stack([ids1[mask], nn12[mask]])
    return matches.t(), nn_dist[mask]


ops = pycolmap.SiftExtractionOptions()
ops.max_num_features = 50
sift = pycolmap.Sift(ops)

fin1 = r"X:\**\ref_image\origin_000000.jpg"
fin2 = r"X:\**\video\origin_000011.jpg"

img1 = Image.open(fin1).convert('RGB')
img11 = np.array(ImageOps.grayscale(img1))
img1 = ImageOps.grayscale(img1)
img1 = np.array(img11).astype(np.float32) / 255.
keypoints1, descriptors1 = sift.extract(img1)
img2 = Image.open(fin2).convert('RGB')
img2 = ImageOps.grayscale(img2)
img2 = np.array(img2).astype(np.float32) / 255.
keypoints2, descriptors2 = sift.extract(img2)

print(keypoints1.shape)
print(keypoints1[:10])
print(keypoints2.shape)
print(keypoints2[:10])
print(descriptors1.device)
matches, distances = mutual_nn_matcher(torch.from_numpy(descriptors1), torch.from_numpy(descriptors2))

print(distances.shape)
print(distances[:10])
print(matches.shape)
print(matches[:10])

sorted_indices = np.argsort(-distances)
sorted_matches = matches[sorted_indices]
sorted_distances = distances[sorted_indices]

print(sorted_distances[:10])
print(sorted_matches[:10])

points1 = keypoints1[sorted_matches[:,0]][:,:2]
points2 = keypoints2[sorted_matches[:,1]][:,:2]

# points1 = keypoints1[:10,:2]
# points2 = keypoints2[:10,:2]

cam = pycolmap.Camera() 
cam1 = cam.create(0, pycolmap.CameraModelId.SIMPLE_PINHOLE, 910, 1280, 720)
cam2 = cam.create(1, pycolmap.CameraModelId.SIMPLE_PINHOLE, 910, 1280, 720)

print(cam1.summary)
print(cam1.principal_point_x)
print(cam1.principal_point_y)
print(cam1.focal_length_x)
print(cam1.focal_length_y)
cam_intrinsics = np.diag([cam1.focal_length_x, cam1.focal_length_y, 1])
cam_intrinsics[0,2] = cam1.principal_point_x
cam_intrinsics[1,2] = cam1.principal_point_y
print(cam_intrinsics)
# sys.exit()
# calculating essential matrix with cameras and points seems to work
ops_ransac = pycolmap.RANSACOptions()
essential = pycolmap.essential_matrix_estimation(points1,
    points2,
    cam1,
    cam2,
    ops_ransac
    )
print(essential.keys())
print(essential["E"])
print("CV2")
R1,R2,t = cv2.decomposeEssentialMat(essential["E"])
print("decomposed Essential Matrix")
print(R1)
print(R_.from_matrix(R1).as_euler('xyz', degrees=True))
print(R2)
print(R_.from_matrix(R2).as_euler('xyz', degrees=True))
print(t)
print(essential["inliers"])
points1_inliers = points1[essential["inliers"]]
points2_inliers = points2[essential["inliers"]]

retval, R, t, mask = cv2.recoverPose(essential["E"], points1_inliers, points2_inliers, cam_intrinsics)
print("Result recoverPose CV2")
print(retval)
print(R)
print(t)
print("Result pycolmap")
print(essential["cam2_from_cam1"].matrix())
print(essential["cam2_from_cam1"].inverse().matrix())
print(essential["cam2_from_cam1"].summary())

print("debug prints: estimate_calibrated_two_view_geometry")
print(points1[:10].astype(np.float64))
print(points2[:10].astype(np.float64))
print()
print(points1[:10].astype(np.float64).shape)
print(points2[:10].astype(np.float64).shape)
print()
print(cam1)
print(cam2)
print()
props = pycolmap.TwoViewGeometryOptions()
print(props.summary())

# all those variants through the same exception.
print("estimate_calibrated_two_view_geometry")
answer = pycolmap.estimate_calibrated_two_view_geometry(cam1, points1[:10].astype(np.float64), cam2, points2[:10].astype(np.float64))
# answer = pycolmap.estimate_calibrated_two_view_geometry(cam1, points1[:10].astype(np.float64), cam2, points2[:10].astype(np.float64), None, props)
# answer = pycolmap.estimate_calibrated_two_view_geometry(cam1, points1[:10].astype(np.float64), cam2, points2[:10].astype(np.float64), None, options = props)

Complete Output

(70, 4)
[[ 1.20958347e+01  2.49640312e+01  1.12905636e+01  2.29283905e+00]
 [ 1.20958347e+01  2.49640312e+01  1.12905636e+01 -1.76369226e+00]
 [ 6.35107666e+02  2.93563751e+02  1.10032005e+01  3.07449651e+00]
 [ 6.35107666e+02  2.93563751e+02  1.10032005e+01 -1.25733212e-01]
 [ 1.12632788e+03  3.13299805e+02  1.18385363e+01  2.60598612e+00]
 [ 2.36843948e+01  3.23181702e+02  9.90066147e+00 -1.41594875e+00]
 [ 8.27308167e+02  3.24141815e+02  1.00310946e+01  1.04846478e+00]
 [ 6.04895447e+02  3.27756073e+02  1.00869875e+01  1.54187500e+00]
 [ 8.14028503e+02  3.55374084e+02  1.12396946e+01  2.85701305e-02]
 [ 5.75664001e+02  3.93533051e+02  1.12058096e+01  7.28527129e-01]]
(65, 4)
[[ 1.0320951e+03  1.2873740e+02  1.0616852e+01 -2.7713996e-01]
 [ 1.0473472e+03  2.0890219e+02  1.1400822e+01  1.1236876e+00]
 [ 1.0473472e+03  2.0890219e+02  1.1400822e+01  2.4707258e+00]
 [ 1.1936158e+03  2.5239784e+02  1.0976992e+01  9.3526775e-01]
 [ 6.3142053e+02  2.9328156e+02  1.0946433e+01  3.0389867e+00]
 [ 2.2146999e+02  3.0215933e+02  1.0667258e+01  1.6780452e+00]
 [ 8.2680542e+02  3.2253909e+02  1.0808415e+01  1.0410192e+00]
 [ 6.0235468e+02  3.2771146e+02  1.0073055e+01  1.5506287e+00]
 [ 1.1461711e+03  3.5643707e+02  1.0565990e+01  1.9988568e+00]
 [ 9.9850354e+02  3.6280661e+02  1.0206414e+01 -1.2435182e+00]]
cpu
torch.Size([52])
tensor([0.0423, 0.5597, 0.0987, 0.0226, 0.1768, 0.0705, 0.2038, 0.1330, 0.1284,
        0.0778])
torch.Size([52, 2])
tensor([[ 2,  4],
        [ 3, 30],
        [ 6,  6],
        [ 7,  7],
        [ 8, 43],
        [ 9, 11],
        [10, 12],
        [12, 14],
        [13, 15],
        [15, 18]])
tensor([0.5860, 0.5597, 0.4843, 0.3750, 0.2752, 0.2038, 0.1919, 0.1845, 0.1768,
        0.1348])
tensor([[31, 10],
        [ 3, 30],
        [30, 60],
        [47,  9],
        [50, 13],
        [10, 12],
        [18, 21],
        [62, 27],
        [ 8, 43],
        [67, 62]])
<bound method PyCapsule.summary of Camera(camera_id=0, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy))>
640.0
360.0
910.0
910.0
[[910.   0. 640.]
 [  0. 910. 360.]
 [  0.   0.   1.]]
dict_keys(['E', 'cam2_from_cam1', 'num_inliers', 'inliers'])
[[-1.30618467e-04  6.90040270e-01 -4.56431589e-02]
 [-6.89801758e-01  2.47030238e-05  1.48734697e-01]
 [ 4.53045055e-02 -1.47513174e-01 -5.26641889e-06]]
CV2
decomposed Essential Matrix
[[ 9.99998438e-01  7.24951801e-05 -1.76573383e-03]
 [-7.33895479e-05  9.99999869e-01 -5.06453850e-04]
 [ 1.76569688e-03  5.06582646e-04  9.99998313e-01]]
[ 0.02902509 -0.10116703 -0.00420492]
[[-0.91223713  0.02686638  0.40878065]
 [ 0.02701979 -0.9917285   0.12547711]
 [ 0.40877053  0.12551005  0.90396564]]
[  7.90462991 -24.12762502 178.30343705]
[[0.2086198 ]
 [0.06405504]
 [0.97589689]]
[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True]
Result recoverPose CV2
1
[[ 9.99998438e-01  7.24951801e-05 -1.76573383e-03]
 [-7.33895479e-05  9.99999869e-01 -5.06453850e-04]
 [ 1.76569688e-03  5.06582646e-04  9.99998313e-01]]
[[0.2086198 ]
 [0.06405504]
 [0.97589689]]
Result pycolmap
[[ 9.99998438e-01  7.24951801e-05 -1.76573383e-03 -2.08619803e-01]
 [-7.33895479e-05  9.99999869e-01 -5.06453850e-04 -6.40550357e-02]
 [ 1.76569688e-03  5.06582646e-04  9.99998313e-01 -9.75896885e-01]]
[[ 9.99998438e-01 -7.33895479e-05  1.76569688e-03  2.10337914e-01]
 [ 7.24951801e-05  9.99999869e-01  5.06582646e-04  6.45645236e-02]
 [-1.76573383e-03 -5.06453850e-04  9.99998313e-01  9.75494431e-01]]
Rigid3d:
    rotation: Rotation3d:
        quat = [ 2.53259231e-04 -8.82858051e-04 -3.64711974e-05  9.99999578e-01]
    translation = [-0.2086198  -0.06405504 -0.97589689]
debug prints: estimate_calibrated_two_view_geometry
[[ 103.25252533  627.5736084 ]
 [ 635.10766602  293.56375122]
 [1170.61694336  609.92657471]
 [ 995.36523438  359.21173096]
 [1038.58496094  416.06182861]
 [1181.74780273  393.3609314 ]
 [ 830.24676514  512.47650146]
 [ 619.87194824  591.88787842]
 [ 814.02850342  355.37408447]
 [ 910.16351318  650.93408203]]
[[ 961.42596436  386.71902466]
 [1213.98132324  658.73760986]
 [1173.48864746  609.75372314]
 [ 998.50354004  362.80661011]
 [1038.75439453  413.33563232]
 [1181.22009277  393.20046997]
 [ 829.9644165   512.37768555]
 [ 616.26953125  592.09075928]
 [ 812.72821045  356.39505005]
 [ 909.18566895  650.11999512]]

(10, 2)
(10, 2)

Camera(camera_id=0, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy))
Camera(camera_id=1, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy))

TwoViewGeometryOptions:
    min_num_inliers = 15
    min_E_F_inlier_ratio = 0.95
    max_H_inlier_ratio = 0.8
    watermark_min_inlier_ratio = 0.7
    watermark_border_size = 0.1
    detect_watermark = True
    multiple_ignore_watermark = True
    force_H_use = False
    compute_relative_pose = False
    multiple_models = False
    ransac: RANSACOptions:
        max_error = 4.0
        min_inlier_ratio = 0.25
        confidence = 0.999
        dyn_num_trials_multiplier = 3.0
        min_num_trials = 100
        max_num_trials = 10000
estimate_calibrated_two_view_geometry
Traceback (most recent call last):
  File "d:\dev\research\sam2\test_colmap_2_issue.py", line 135, in <module>
    answer = pycolmap.estimate_calibrated_two_view_geometry(cam1, points1[:10].astype(np.float64), cam2, points2[:10].astype(np.float64))
TypeError: estimate_calibrated_two_view_geometry(): incompatible function arguments. The following argument types are supported:
    1. (camera1: pycolmap.Camera, points1: numpy.ndarray[numpy.float64[m, 2]], camera2: pycolmap.Camera, points2: numpy.ndarray[numpy.float64[m, 2]], matches: numpy.ndarray[numpy.uint32[m, 2]] = None, options: pycolmap.TwoViewGeometryOptions = <pycolmap.TwoViewGeometryOptions object at 0x000001CDDD695D70>) -> pycolmap.TwoViewGeometry

Invoked with: Camera(camera_id=0, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy)), array([[ 103.25252533,  627.5736084 ],
       [ 635.10766602,  293.56375122],
       [1170.61694336,  609.92657471],
       [ 995.36523438,  359.21173096],
       [1038.58496094,  416.06182861],
       [1181.74780273,  393.3609314 ],
       [ 830.24676514,  512.47650146],
       [ 619.87194824,  591.88787842],
       [ 814.02850342,  355.37408447],
       [ 910.16351318,  650.93408203]]), Camera(camera_id=1, model=SIMPLE_PINHOLE, width=1280, height=720, params=[910.000000, 640.000000, 360.000000] (f, cx, cy)), array([[ 961.42596436,  386.71902466],        
       [1213.98132324,  658.73760986],
       [1173.48864746,  609.75372314],
       [ 998.50354004,  362.80661011],
       [1038.75439453,  413.33563232],
       [1181.22009277,  393.20046997],
       [ 829.9644165 ,  512.37768555],
       [ 616.26953125,  592.09075928],
       [ 812.72821045,  356.39505005],
       [ 909.18566895,  650.11999512]])

Different Exception utilizing matches argument

# when utilizing the matches argument, a different error occurs. See below.
print("estimate_calibrated_two_view_geometry with matches")
matches2, _ = mutual_nn_matcher(torch.from_numpy(descriptors1[:50]), torch.from_numpy(descriptors2[:50]))
print(matches2.to('cpu').numpy().shape)
answer = pycolmap.estimate_calibrated_two_view_geometry(cam1, keypoints1[:50,:2].astype(np.float64), cam2,  keypoints2[:50,:2].astype(np.float64), matches2.to('cpu').numpy())

# calling estimate_calibrated_two_view_geometry with macthes throws a different error. 
# estimate_calibrated_two_view_geometry with matches
# (34, 2)
# *** Aborted at 1731322363 (unix time) try "date -d @1731322363" if you are using GNU date ***
#     @     0x7fffed8d4172 log2f
#     @     0x7ff71e51207c OPENSSL_Applink
#     @     0x7fffba9fe5cf __C_specific_handler
#     @     0x7ffff053517f __chkstk
#     @     0x7ffff04ae856 RtlFindCharInUnicodeString
#     @     0x7ffff053416e KiUserExceptionDispatcher
@huangk77
Copy link

i think you are not input the matches indices.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants