Skip to content

Geometry

Module including geometric routines.

This module contains the following functions:

  • essential_from_features(src_image_file, tgt_image_file, K) - Computes the essential matrix between two images using image features.
  • fundamental_from_KP(K, P_src, P_tgt) - Computes the fundamental matrix between two images using camera parameters.
  • fundamental_from_features(src_image_file, tgt_image_file) - Computes the fundamental matrix between two images using image features.
  • geometric_consistency_mask(src_depth, src_cam, tgt_depth, tgt_cam, pixel_th) - Computes the geometric consistency mask between a source and target depth map.
  • homography(src_image_file, tgt_image_file) - Computes a homography transformation between two images using image features.
  • match_features(src_image, tgt_image, max_features) - Computer matching ORB features between a pair of images.
  • point_cloud_from_depth(depth, cam, color) - Creates a point cloud from a single depth map.
  • points_from_depth(depth, cam) - Creates a point array from a single depth map.
  • project_depth_map(depth, cam, mask) - Projects a depth map into a list of 3D points.
  • project_renderer(renderer, K, P, width, height) - Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.
  • render_custom_values(points, values, image_shape, cam) - Renders a point cloud into a 2D camera plane using custom values for each pixel.
  • render_point_cloud(cloud, cam, width, height) - Renders a point cloud into a 2D image plane.
  • reproject(src_depth, src_cam, tgt_depth, tgt_cam) - Computes the re-projection depth values and pixel indices between two depth maps.
  • visibility_mask(src_depth, src_cam, depth_files, cam_files, src_ind=-1, pixel_th=0.1) - Computes a visibility mask between a provided source depth map and list of target depth maps.

compute_plane_coords(K, P, depth_hypos, H, W)

Batched PyTorch version

Source code in cvt/geometry.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def compute_plane_coords(K, P, depth_hypos, H, W):
    """Batched PyTorch version
    """
    batch_size,_,_ = K.shape
    num_planes = depth_hypos.shape[0]

    xyz = torch.movedim(torch.tensor([[0,0,1], [W-1,0,1], [0,H-1,1], [W-1,H-1,1]], dtype=torch.float32), 0, 1).to(P)
    xyz = xyz.reshape(1,3,4).repeat(batch_size, 1, 1)
    if K.shape[1]==3:
        K_44 = torch.zeros((batch_size, 4, 4)).to(P)
        K_44[:,:3,:3] = K[:,:3,:3]
        K_44[:,3,3] = 1
        K = K_44
    proj = K @ P
    inv_proj = torch.linalg.inv(proj)

    planes = torch.zeros(num_planes, 3, 4).to(inv_proj)
    for p in range(num_planes):
        planes[p] = (inv_proj[0,:3,:3] @ xyz) * depth_hypos[p]
        planes[p] += inv_proj[0,:3,3:4]

    return planes

essential_from_features(src_image_file, tgt_image_file, K)

Computes the essential matrix between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required
K np.ndarray

Intrinsics matrix of the two cameras (assumed to be constant between views).

required

Returns:

Type Description
np.ndarray

The essential matrix betweent the two image views.

Source code in cvt/geometry.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
def essential_from_features(src_image_file: str, tgt_image_file: str, K: np.ndarray) -> np.ndarray:
    """Computes the essential matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).

    Returns:
        The essential matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    E, mask = cv2.findEssentialMat(src_points, tgt_points, K, method=cv2.RANSAC)

    return E

fundamental_from_KP(K, P_src, P_tgt)

Computes the fundamental matrix between two images using camera parameters.

Parameters:

Name Type Description Default
K np.ndarray

Intrinsics matrix of the two cameras (assumed to be constant between views).

required
P_src np.ndarray

Extrinsics matrix for the source view.

required
P_tgt np.ndarray

Extrinsics matrix for the target view.

required

Returns:

Type Description
np.ndarray

The fundamental matrix betweent the two cameras.

Source code in cvt/geometry.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def fundamental_from_KP(K: np.ndarray, P_src: np.ndarray, P_tgt: np.ndarray) -> np.ndarray:
    """Computes the fundamental matrix between two images using camera parameters.

    Parameters:
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).
        P_src: Extrinsics matrix for the source view.
        P_tgt: Extrinsics matrix for the target view.

    Returns:
        The fundamental matrix betweent the two cameras.
    """
    R1 = P_src[0:3,0:3]
    t1 = P_src[0:3,3]
    R2 = P_tgt[0:3,0:3]
    t2 = P_tgt[0:3,3]

    t1aug = np.array([t1[0], t1[1], t1[2], 1])
    epi2 = np.matmul(P_tgt,t1aug)
    epi2 = np.matmul(K,epi2[0:3])

    R = np.matmul(R2,np.transpose(R1))
    t= t2- np.matmul(R,t1)
    K1inv = np.linalg.inv(K)
    K2invT = np.transpose(K1inv)
    tx = np.array([[0, -t[2], t[1]], [t[2], 0, -t[0]], [-t[1], t[0], 0]])
    F = np.matmul(K2invT,np.matmul(tx,np.matmul(R,K1inv)))
    F = F/np.amax(F)

    return F

fundamental_from_features(src_image_file, tgt_image_file)

Computes the fundamental matrix between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required

Returns:

Type Description
np.ndarray

The fundamental matrix betweent the two image views.

Source code in cvt/geometry.py
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
def fundamental_from_features(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes the fundamental matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The fundamental matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    F, mask = cv2.findFundamentalMat(src_points,tgt_points,cv2.FM_8POINT)

    return F

geometric_consistency_error(src_depth, src_cam, tgt_depth, tgt_cam)

Computes the geometric consistency error between a source and target depth map.

Parameters:

Name Type Description Default
src_depth np.ndarray

Depth map for the source view.

required
src_cam np.ndarray

Camera parameters for the source depth map viewpoint.

required
tgt_depth np.ndarray

Depth map for the target view.

required
tgt_cam np.ndarray

Camera parameters for the target depth map viewpoint.

required

Returns:

Type Description
np.ndarray

The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in cvt/geometry.py
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
def geometric_consistency_error(src_depth: np.ndarray, src_cam: np.ndarray, tgt_depth: np.ndarray, tgt_cam: np.ndarray) -> np.ndarray:
    """Computes the geometric consistency error between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        tgt_depth: Depth map for the target view.
        tgt_cam: Camera parameters for the target depth map viewpoint.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    height, width = src_depth.shape
    x_src, y_src = np.meshgrid(np.arange(0, width), np.arange(0, height))

    depth_reprojected, coords_reprojected, coords_tgt, projection_map = reproject(src_depth, src_cam, tgt_depth, tgt_cam)

    dist = np.sqrt((coords_reprojected[:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,1] - y_src) ** 2)

    return dist, projection_map

geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th)

Computes the geometric consistency mask between a source and target depth map.

Parameters:

Name Type Description Default
src_depth

Depth map for the source view.

required
src_K

Intrinsic camera parameters for the source depth map viewpoint.

required
src_P

Extrinsic camera parameters for the source depth map viewpoint.

required
tgt_depth

Target depth map used for re-projection.

required
tgt_K

Intrinsic camera parameters for the target depth map viewpoint.

required
tgt_P

Extrinsic camera parameters for the target depth map viewpoint.

required
pixel_th

Pixel re-projection threshold to determine matching depth estimates.

required

Returns:

Type Description

The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in cvt/geometry.py
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th):
    """Computes the geometric consistency mask between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    batch_size, c, height, width = src_depth.shape
    depth_reprojected, coords_reprojected, coords_tgt = reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)

    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    y_src = y_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    dist = torch.sqrt((coords_reprojected[:,:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,:,1] - y_src) ** 2)

    mask = torch.where(dist < pixel_th, 1, 0)
    return mask

homography(src_image_file, tgt_image_file)

Computes a homography transformation between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required

Returns:

Type Description
np.ndarray

The homography matrix to warp the target image to the source image.

Source code in cvt/geometry.py
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
def homography(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes a homography transformation between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The homography matrix to warp the target image to the source image.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    (height, width, _) = src_image.shape

    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    H, mask = cv2.findHomography(tgt_points, src_points, method=cv2.RANSAC)

    return H

match_features(src_image, tgt_image, max_features=500)

Computer matching ORB features between a pair of images.

Parameters:

Name Type Description Default
src_image np.ndarray

The source image to compute and match features.

required
tgt_image np.ndarray

The target image to compute and match features.

required
max_features int

The maximum number of features to retain.

500

Returns:

Name Type Description
src_points np.ndarray

The set of matched point coordinates for the source image.

tgt_points np.ndarray

The set of matched point coordinates for the target image.

Source code in cvt/geometry.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
def match_features(src_image: np.ndarray, tgt_image: np.ndarray, max_features: int = 500) -> Tuple[np.ndarray, np.ndarray]:
    """Computer matching ORB features between a pair of images.

    Args:
        src_image: The source image to compute and match features.
        tgt_image: The target image to compute and match features.
        max_features: The maximum number of features to retain.

    Returns:
        src_points: The set of matched point coordinates for the source image.
        tgt_points: The set of matched point coordinates for the target image.
    """
    src_image = cv2.cvtColor(src_image,cv2.COLOR_BGR2GRAY)
    tgt_image = cv2.cvtColor(tgt_image, cv2.COLOR_BGR2GRAY)

    orb = cv2.ORB_create(max_features)

    src_keypoints, src_descriptors = orb.detectAndCompute(src_image,None)
    tgt_keypoints, tgt_descriptors = orb.detectAndCompute(tgt_image,None)

    matcher = cv2.BFMatcher(crossCheck=True)
    matches = list(matcher.match(src_descriptors, tgt_descriptors) )
    matches.sort(key = lambda x:x.distance)

    src_points = []
    tgt_points = []
    for i in range(8):
        m = matches[i]

        src_points.append(src_keypoints[m.queryIdx].pt)
        tgt_points.append(tgt_keypoints[m.trainIdx].pt)
    src_points  = np.asarray(src_points)
    tgt_points = np.asarray(tgt_points)

    return (src_points, tgt_points)

point_cloud_from_depth(depth, cam, color)

Creates a point cloud from a single depth map.

Parameters:

Name Type Description Default
depth np.ndarray

Depth map to project to 3D.

required
cam np.ndarray

Camera parameters for the given depth map viewpoint.

required
color np.ndarray

Color [R,G,B] used for all points in the generated point cloud.

required
Source code in cvt/geometry.py
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
def point_cloud_from_depth(depth: np.ndarray, cam: np.ndarray, color: np.ndarray) -> o3d.geometry.PointCloud:
    """Creates a point cloud from a single depth map.

    Parameters:
        depth: Depth map to project to 3D.
        cam: Camera parameters for the given depth map viewpoint.
        color: Color [R,G,B] used for all points in the generated point cloud.
    """
    cloud = o3d.geometry.PointCloud()

    # extract camera params
    height, width = depth.shape
    fx = cam[1,0,0]
    fy = cam[1,1,1]
    cx = cam[1,0,2]
    cy = cam[1,1,2]
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, fx, fy, cx, cy)
    extrins = cam[0]

    # convert deth to o3d.geometry.Image
    depth_map = o3d.geometry.Image(depth)

    # project depth map to 3D
    cloud = cloud.create_from_depth_image(depth_map, intrins, extrins, depth_scale=1.0, depth_trunc=1000)

    # color point cloud
    colors = o3d.utility.Vector3dVector(np.full((len(cloud.points), 3), color))
    cloud.colors = colors

    return cloud

points_from_depth(depth, cam)

Creates a point array from a single depth map.

Parameters:

Name Type Description Default
depth np.ndarray

Depth map to project to 3D.

required
cam np.ndarray

Camera parameters for the given depth map viewpoint.

required

Returns:

Type Description
np.ndarray

An array of 3D points corresponding to the input depth map.

Source code in cvt/geometry.py
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
def points_from_depth(depth: np.ndarray, cam: np.ndarray) -> np.ndarray:
    """Creates a point array from a single depth map.

    Parameters:
        depth: Depth map to project to 3D.
        cam: Camera parameters for the given depth map viewpoint.

    Returns:
        An array of 3D points corresponding to the input depth map.
    """
    # project depth map to point cloud
    height, width = depth.shape
    x = np.linspace(0,width-1,num=width)
    y = np.linspace(0,height-1,num=height)
    x,y = np.meshgrid(x,y, indexing="xy")
    x = x.flatten()
    y = y.flatten()
    depth = depth.flatten()
    xyz_cam = np.matmul(np.linalg.inv(cam[1,:3,:3]), np.vstack((x, y, np.ones_like(x))) * depth)
    xyz_world = np.matmul(np.linalg.inv(cam[0,:4,:4]), np.vstack((xyz_cam, np.ones_like(x))))[:3]
    points = xyz_world.transpose((1, 0))
    return points

project_depth_map(depth, cam, mask=None)

Projects a depth map into a list of 3D points

Parameters:

Name Type Description Default
depth torch.Tensor

Input depth map to project.

required
cam torch.Tensor

Camera parameters for input depth map.

required

Returns:

Type Description
torch.Tensor

A float Tensor of 3D points corresponding to the projected depth values.

Source code in cvt/geometry.py
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
def project_depth_map(depth: torch.Tensor, cam: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
    """Projects a depth map into a list of 3D points

    Parameters:
        depth: Input depth map to project.
        cam: Camera parameters for input depth map.

    Returns:
        A float Tensor of 3D points corresponding to the projected depth values.
    """
    if (depth.shape[1] == 1):
        depth = depth.squeeze(1)

    batch_size, height, width = depth.shape
    cam_shape = cam.shape

    # get camera extrinsics and intrinsics
    P = cam[:,0,:,:]
    K = cam[:,1,:,:]
    K[:,3,:] = torch.tensor([0,0,0,1])

    # construct back-projection from invers matrices
    # separate into rotation and translation components
    bwd_projection = torch.matmul(torch.inverse(P), torch.inverse(K)).to(torch.float32)
    bwd_rotation = bwd_projection[:,:3,:3]
    bwd_translation = bwd_projection[:,:3,3:4]

    # build 2D homogeneous coordinates tensor: [B, 3, H*W]
    with torch.no_grad():
        row_span = torch.arange(0, height, dtype=torch.float32).cuda()
        col_span = torch.arange(0, width, dtype=torch.float32).cuda()
        r,c = torch.meshgrid(row_span, col_span, indexing="ij")
        r,c = r.contiguous(), c.contiguous()
        r,c = r.reshape(height*width), c.reshape(height*width)
        coords = torch.stack((c,r,torch.ones_like(c)))
        coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1)

    # compute 3D coordinates using the depth map: [B, H*W, 3]
    world_coords = torch.matmul(bwd_rotation, coords)
    depth = depth.reshape(batch_size, 1, -1)
    world_coords = world_coords * depth
    world_coords = world_coords + bwd_translation

    #TODO: make sure index select is differentiable
    #       (there is a backward function but need to find the code..)
    if (mask != None):
        world_coords = torch.index_select(world_coords, dim=2, index=non_zero_inds)
        world_coords = torch.movedim(world_coords, 1, 2)

    # reshape 3D coordinates back into 2D map: [B, H, W, 3]
    #   coords_map = world_coords.reshape(batch_size, height, width, 3)

    return world_coords

project_renderer(renderer, K, P, width, height)

Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

Parameters:

Name Type Description Default
renderer o3d.visualization.rendering.OffscreenRenderer

Geometric scene to be projected.

required
K np.ndarray

Camera intrinsic parameters.

required
P np.ndarray

Camera extrinsic parameters.

required
width float

Desired image width.

required
height float

Desired image height.

required

Returns:

Type Description
np.ndarray

The rendered image for the scene at the specified camera viewpoint.

Source code in cvt/geometry.py
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
def project_renderer(renderer: o3d.visualization.rendering.OffscreenRenderer, K: np.ndarray, P: np.ndarray, width: float, height: float) -> np.ndarray:
    """Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

    Parameters:
        renderer: Geometric scene to be projected.
        K: Camera intrinsic parameters.
        P: Camera extrinsic parameters.
        width: Desired image width.
        height: Desired image height.

    Returns:
        The rendered image for the scene at the specified camera viewpoint.
    """
    # set up the renderer
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, K[0,0], K[1,1], K[0,2], K[1,2])
    renderer.setup_camera(intrins, P)

    # render image
    image = np.asarray(renderer.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image

render_custom_values(points, values, image_shape, cam)

Renders a point cloud into a 2D camera plane using custom values for each pixel.

Parameters:

Name Type Description Default
points np.ndarray

List of 3D points to be rendered.

required
values np.ndarray

List of values to be written in the rendered image.

required
image_shape Tuple[int, int]

Desired shape (height,width) of the rendered image.

required
cam np.ndarray

Camera parameters for the image viewpoint.

required

Returns:

Type Description
np.ndarray

The rendered image for the list of points using the sepcified corresponding values.

Source code in cvt/geometry.py
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
def render_custom_values(points: np.ndarray, values: np.ndarray, image_shape: Tuple[int,int], cam: np.ndarray) -> np.ndarray:
    """Renders a point cloud into a 2D camera plane using custom values for each pixel.

    Parameters:
        points: List of 3D points to be rendered.
        values: List of values to be written in the rendered image.
        image_shape: Desired shape (height,width) of the rendered image.
        cam: Camera parameters for the image viewpoint.

    Returns:
        The rendered image for the list of points using the sepcified corresponding values.
    """
    points = points.tolist()
    values = list(values.astype(float))
    cam = cam.flatten().tolist()

    rendered_img = rd.render(list(image_shape), points, values, cam)

    return rendered_img

render_point_cloud(cloud, cam, width, height)

Renders a point cloud into a 2D image plane.

Parameters:

Name Type Description Default
cloud o3d.geometry.PointCloud

Point cloud to be rendered.

required
cam np.ndarray

Camera parameters for the image plane.

required
width int

Desired width of the rendered image.

required
height int

Desired height of the rendered image.

required

Returns:

Type Description
np.ndarray

The rendered image for the point cloud at the specified camera viewpoint.

Source code in cvt/geometry.py
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
def render_point_cloud(cloud: o3d.geometry.PointCloud, cam: np.ndarray, width: int, height: int) -> np.ndarray:
    """Renders a point cloud into a 2D image plane.

    Parameters:
        cloud: Point cloud to be rendered.
        cam: Camera parameters for the image plane.
        width: Desired width of the rendered image.
        height: Desired height of the rendered image.

    Returns:
        The rendered image for the point cloud at the specified camera viewpoint.
    """
    #   cmap = plt.get_cmap("hot_r")
    #   colors = cmap(dists)[:, :3]
    #   ply.colors = o3d.utility.Vector3dVector(colors)

    # set up the renderer
    render = o3d.visualization.rendering.OffscreenRenderer(width, height)
    mat = o3d.visualization.rendering.MaterialRecord()
    mat.shader = 'defaultUnlit'
    render.scene.add_geometry("cloud", cloud, mat)
    render.scene.set_background(np.asarray([0,0,0,1])) #r,g,b,a
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, cam[1,0,0], cam[1,1,1], cam[1,0,2], cam[1,1,2])
    render.setup_camera(intrins, cam[0])

    # render image
    image = np.asarray(render.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image

reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)

Computes the re-projection depth values and pixel indices between two depth maps.

This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source depth map is first projected into the target camera plane using the source depth values and the camera parameters for both views. Using the projected pixel coordinates in the target view, the target depths are then re-projected back into the source camera plane (again with the camera parameters for both views). The information prouced from this process is often used to compute errors in re-projection between two depth maps, or similar operations.

Parameters:

Name Type Description Default
src_depth

Source depth map to be projected.

required
src_K

Intrinsic camera parameters for the source depth map viewpoint.

required
src_P

Extrinsic camera parameters for the source depth map viewpoint.

required
tgt_depth

Target depth map used for re-projection.

required
tgt_K

Intrinsic camera parameters for the target depth map viewpoint.

required
tgt_P

Extrinsic camera parameters for the target depth map viewpoint.

required

Returns:

Name Type Description
depth_reprojected

The re-projected depth values for the source depth map.

coords_reprojected

The re-projection coordinates for the source view.

coords_tgt

The projected coordinates for the target view.

Source code in cvt/geometry.py
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
def reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P):
    """Computes the re-projection depth values and pixel indices between two depth maps.

    This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source
    depth map is first projected into the target camera plane using the source depth
    values and the camera parameters for both views. Using the projected pixel
    coordinates in the target view, the target depths are then re-projected back into
    the source camera plane (again with the camera parameters for both views). The
    information prouced from this process is often used to compute errors in
    re-projection between two depth maps, or similar operations.

    Parameters:
        src_depth: Source depth map to be projected.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.

    Returns:
        depth_reprojected: The re-projected depth values for the source depth map.
        coords_reprojected: The re-projection coordinates for the source view.
        coords_tgt: The projected coordinates for the target view.
    """
    batch_size, c, height, width = src_depth.shape

    # back-project ref depths to 3D
    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    y_src = y_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    homog = torch.stack((x_src, y_src, torch.ones_like(x_src)), dim=1)
    xyz_src = torch.matmul(torch.linalg.inv(src_K), homog * src_depth.reshape(batch_size, 1, -1))

    # transform 3D points from ref to src coords
    homog_3d = torch.concatenate((xyz_src, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.matmul(tgt_P, torch.linalg.inv(src_P)), homog_3d)[:,:3]

    # project src 3D points into pixel coords
    K_xyz_tgt = torch.matmul(tgt_K, xyz_tgt)
    xy_tgt = K_xyz_tgt[:,:2] / K_xyz_tgt[:,2:3]
    x_tgt = xy_tgt[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_tgt = xy_tgt[:,1].reshape(batch_size, height, width).to(torch.float32)
    coords_tgt = torch.stack((x_tgt, y_tgt), dim=-1) # B x H x W x 2

    # sample the depth values from the src map at each pixel coord
    x_normalized = ((x_tgt / (width-1)) * 2) - 1
    y_normalized = ((y_tgt / (height-1)) * 2) - 1
    grid = torch.stack((x_normalized, y_normalized), dim=-1) # B x H x W x 2
    sampled_depth_tgt = F.grid_sample(
                                    tgt_depth,
                                    grid,
                                    mode="bilinear",
                                    padding_mode="zeros",
                                    align_corners=False)

    # back-project src depths to 3D
    homog = torch.concatenate((xy_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.linalg.inv(tgt_K), homog * sampled_depth_tgt.reshape(batch_size, 1, -1))

    # transform 3D points from src to ref coords
    homog_3d = torch.concatenate((xyz_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_reprojected = torch.matmul(torch.matmul(src_P, torch.linalg.inv(tgt_P)), homog_3d)[:,:3]

    # extract reprojected depth values
    depth_reprojected = xyz_reprojected[:,2].reshape(batch_size, height, width).to(torch.float32)

    # project ref 3D points into pixel coords
    K_xyz_reprojected = torch.matmul(src_K, xyz_reprojected)
    xy_reprojected = K_xyz_reprojected[:,:2] / (K_xyz_reprojected[:,2:3] + 1e-7)
    x_reprojected = xy_reprojected[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_reprojected = xy_reprojected[:,1].reshape(batch_size, height, width).to(torch.float32)

    coords_reprojected = torch.stack((x_reprojected, y_reprojected), dim=-1) # B x H x W x 2

    return depth_reprojected, coords_reprojected, coords_tgt

visibility_mask(src_depth, src_cam, depth_files, cam_files, src_ind=-1, pixel_th=0.1)

Computes a visibility mask between a provided source depth map and list of target depth maps.

Parameters:

Name Type Description Default
src_depth np.ndarray

Depth map for the source view.

required
src_cam np.ndarray

Camera parameters for the source depth map viewpoint.

required
depth_files List[str]

List of target depth maps.

required
cam_files List[str]

List of corresponding target camera parameters for each targte depth map viewpoint.

required
src_ind int

Index into 'depth_files' corresponding to the source depth map (if included in the list).

-1
pixel_th float

Pixel re-projection threshold to determine matching depth estimates.

0.1

Returns:

Type Description
np.ndarray

The visibility mask for the source view.

Source code in cvt/geometry.py
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
def visibility_mask(src_depth: np.ndarray, src_cam: np.ndarray, depth_files: List[str], cam_files: List[str], src_ind: int = -1, pixel_th: float = 0.1) -> np.ndarray:
    """Computes a visibility mask between a provided source depth map and list of target depth maps.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        depth_files: List of target depth maps.
        cam_files: List of corresponding target camera parameters for each targte depth map viewpoint.
        src_ind: Index into 'depth_files' corresponding to the source depth map (if included in the list).
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The visibility mask for the source view.
    """
    height, width = src_depth.shape
    vis_map = np.not_equal(src_depth, 0.0).astype(np.double)

    for i in range(len(depth_files)):
        if (i==src_ind):
            continue

        # get files
        sdf = depth_files[i]
        scf = cam_files[i]

        # load data
        tgt_depth = read_pfm(sdf)
        tgt_cam = read_single_cam_sfm(scf,'r')

        mask = geometric_consistency_mask(src_depth, src_cam, tgt_depth, tgt_cam, pixel_th)
        vis_map += mask

    return vis_map.astype(np.float32)

warp_to_tgt(tgt_depth, tgt_conf, ref_cam, tgt_cam, depth_planes, depth_vol)

Performs a homography warping

Parameters:

Name Type Description Default
tgt_depth torch.Tensor required
tgt_conf torch.Tensor required
ref_cam torch.Tensor required
tgt_cam torch.Tensor required
depth_planes torch.Tensor required
depth_vol torch.Tensor required

Returns:

Name Type Description
depth_diff torch.Tensor
warped_conf torch.Tensor
Source code in cvt/geometry.py
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
def warp_to_tgt(tgt_depth: torch.Tensor, tgt_conf: torch.Tensor, ref_cam: torch.Tensor, tgt_cam: torch.Tensor, depth_planes: torch.Tensor, depth_vol: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
    """Performs a homography warping
    Parameters:
        tgt_depth: 
        tgt_conf: 
        ref_cam:
        tgt_cam:
        depth_planes:
        depth_vol:

    Returns:
        depth_diff:
        warped_conf:
    """
    batch_size, views, height, width = tgt_depth.shape
    # grab intrinsics and extrinsics from reference view
    P_ref = ref_cam[:,0,:,:]
    K_ref = ref_cam[:,1,:,:]
    K_ref[:,3,:] = torch.tensor([0,0,0,1])

    # get intrinsics and extrinsics from target view
    P_tgt = tgt_cam[:,0,:,:]
    K_tgt = tgt_cam[:,1,:,:]
    K_tgt[:,3,:] = torch.tensor([0,0,0,1])

    R_tgt = P_tgt[:,:3,:3]
    t_tgt = P_tgt[:,:3,3:4]
    C_tgt = torch.matmul(-R_tgt.transpose(1,2), t_tgt)
    z_tgt = R_tgt[:,2:3,:3].reshape(batch_size,1,1,1,1,3).repeat(1,depth_planes, height,width,1,1)

    with torch.no_grad():
        # shape camera center vector
        C_tgt = C_tgt.reshape(batch_size,1,1,1,3).repeat(1, depth_planes, height, width, 1)

        bwd_proj = torch.matmul(torch.inverse(P_ref), torch.inverse(K_ref)).to(torch.float32)
        fwd_proj = torch.matmul(K_tgt, P_tgt).to(torch.float32)

        bwd_rot = bwd_proj[:,:3,:3]
        bwd_trans = bwd_proj[:,:3,3:4]

        proj = torch.matmul(fwd_proj, bwd_proj)
        rot = proj[:,:3,:3]
        trans = proj[:,:3,3:4]

        y, x = torch.meshgrid([torch.arange(0, height,dtype=torch.float32,device=tgt_depth.device),
                                     torch.arange(0, width, dtype=torch.float32, device=tgt_depth.device)], indexing='ij')
        y, x = y.contiguous(), x.contiguous()
        y, x = y.reshape(height*width), x.reshape(height*width)
        homog = torch.stack((x,y,torch.ones_like(x)))
        homog = torch.unsqueeze(homog, 0).repeat(batch_size,1,1)

        # get world coords
        world_coords = torch.matmul(bwd_rot, homog)
        world_coords = world_coords.unsqueeze(2).repeat(1,1,depth_planes,1)
        depth_vol = depth_vol.reshape(batch_size,1,depth_planes,-1)
        world_coords = world_coords * depth_vol
        world_coords = world_coords + bwd_trans.reshape(batch_size,3,1,1)
        world_coords = torch.movedim(world_coords, 1, 3)
        world_coords = world_coords.reshape(batch_size, depth_planes, height, width,3)

        # get pixel projection
        rot_coords = torch.matmul(rot, homog)
        rot_coords = rot_coords.unsqueeze(2).repeat(1,1,depth_planes,1)
        proj_3d = rot_coords * depth_vol
        proj_3d = proj_3d + trans.reshape(batch_size,3,1,1)
        proj_2d = proj_3d[:,:2,:,:] / proj_3d[:,2:3,:,:]

        proj_x = proj_2d[:,0,:,:] / ((width-1)/2) - 1
        proj_y = proj_2d[:,1,:,:] / ((height-1)/2) - 1
        proj_2d = torch.stack((proj_x, proj_y), dim=3)
        grid = proj_2d


    proj_depth = torch.sub(world_coords, C_tgt).unsqueeze(-1)
    proj_depth = torch.matmul(z_tgt, proj_depth).reshape(batch_size,depth_planes,height,width)

    warped_depth = F.grid_sample(tgt_depth, grid.reshape(batch_size, depth_planes*height, width, 2), mode='bilinear', padding_mode="zeros", align_corners=False)
    warped_depth = warped_depth.reshape(batch_size, depth_planes, height, width)

    warped_conf = F.grid_sample(tgt_conf, grid.reshape(batch_size, depth_planes*height, width, 2), mode='bilinear', padding_mode="zeros", align_corners=False)
    warped_conf = warped_conf.reshape(batch_size, depth_planes, height, width)

    depth_diff = torch.sub(proj_depth, warped_depth)

    return depth_diff, warped_conf