Skip to content

cvt.geometry

Module including geometric routines.

essential_from_features(src_image_file, tgt_image_file, K)

Computes the essential matrix between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required
K ndarray

Intrinsics matrix of the two cameras (assumed to be constant between views).

required

Returns:

Type Description
ndarray

The essential matrix betweent the two image views.

Source code in src/cvt/geometry.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
def essential_from_features(src_image_file: str, tgt_image_file: str, K: np.ndarray) -> np.ndarray:
    """Computes the essential matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).

    Returns:
        The essential matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    E, mask = cv2.findEssentialMat(src_points, tgt_points, K, method=cv2.RANSAC)

    return E

fundamental_from_KP(K, P_src, P_tgt)

Computes the fundamental matrix between two images using camera parameters.

Parameters:

Name Type Description Default
K ndarray

Intrinsics matrix of the two cameras (assumed to be constant between views).

required
P_src ndarray

Extrinsics matrix for the source view.

required
P_tgt ndarray

Extrinsics matrix for the target view.

required

Returns:

Type Description
ndarray

The fundamental matrix betweent the two cameras.

Source code in src/cvt/geometry.py
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
def fundamental_from_KP(K: np.ndarray, P_src: np.ndarray, P_tgt: np.ndarray) -> np.ndarray:
    """Computes the fundamental matrix between two images using camera parameters.

    Parameters:
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).
        P_src: Extrinsics matrix for the source view.
        P_tgt: Extrinsics matrix for the target view.

    Returns:
        The fundamental matrix betweent the two cameras.
    """
    F_mats = []
    for i in range(K.shape[0]):
        R1 = P_src[i,0:3,0:3]
        t1 = P_src[i,0:3,3]
        R2 = P_tgt[i,0:3,0:3]
        t2 = P_tgt[i,0:3,3]

        t1aug = torch.tensor([t1[0], t1[1], t1[2], 1]).to(K)
        epi2 = torch.matmul(P_tgt[i],t1aug)
        epi2 = torch.matmul(K[i],epi2[0:3])

        R = torch.matmul(R2,torch.t(R1))
        t = t2 - torch.matmul(R,t1)
        K1inv = torch.linalg.inv(K[i])
        K2invT = torch.t(K1inv)
        tx = torch.tensor([[0, -t[2], t[1]], [t[2], 0, -t[0]], [-t[1], t[0], 0]]).to(K)
        F = torch.matmul(K2invT,torch.matmul(tx,torch.matmul(R,K1inv)))
        F = F/(torch.max(F)+1e-10)
        F_mats.append(F)

    return torch.stack(F_mats, dim=0)

fundamental_from_features(src_image_file, tgt_image_file)

Computes the fundamental matrix between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required

Returns:

Type Description
ndarray

The fundamental matrix betweent the two image views.

Source code in src/cvt/geometry.py
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
def fundamental_from_features(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes the fundamental matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The fundamental matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    F, mask = cv2.findFundamentalMat(src_points,tgt_points,cv2.FM_8POINT)

    return F

geometric_consistency_error(src_depth, src_cam, tgt_depth, tgt_cam)

Computes the geometric consistency error between a source and target depth map.

Parameters:

Name Type Description Default
src_depth ndarray

Depth map for the source view.

required
src_cam ndarray

Camera parameters for the source depth map viewpoint.

required
tgt_depth ndarray

Depth map for the target view.

required
tgt_cam ndarray

Camera parameters for the target depth map viewpoint.

required

Returns:

Type Description
ndarray

The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in src/cvt/geometry.py
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
def geometric_consistency_error(src_depth: np.ndarray, src_cam: np.ndarray, tgt_depth: np.ndarray, tgt_cam: np.ndarray) -> np.ndarray:
    """Computes the geometric consistency error between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        tgt_depth: Depth map for the target view.
        tgt_cam: Camera parameters for the target depth map viewpoint.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    height, width = src_depth.shape
    x_src, y_src = np.meshgrid(np.arange(0, width), np.arange(0, height))

    depth_reprojected, coords_reprojected, coords_tgt, projection_map = reproject(src_depth, src_cam, tgt_depth, tgt_cam)

    dist = np.sqrt((coords_reprojected[:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,1] - y_src) ** 2)

    return dist, projection_map

geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th)

Computes the geometric consistency mask between a source and target depth map.

Parameters:

Name Type Description Default
src_depth

Depth map for the source view.

required
src_K

Intrinsic camera parameters for the source depth map viewpoint.

required
src_P

Extrinsic camera parameters for the source depth map viewpoint.

required
tgt_depth

Target depth map used for re-projection.

required
tgt_K

Intrinsic camera parameters for the target depth map viewpoint.

required
tgt_P

Extrinsic camera parameters for the target depth map viewpoint.

required
pixel_th

Pixel re-projection threshold to determine matching depth estimates.

required

Returns:

Type Description

The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in src/cvt/geometry.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
def geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th):
    """Computes the geometric consistency mask between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    batch_size, c, height, width = src_depth.shape
    depth_reprojected, coords_reprojected, coords_tgt = reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)

    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    y_src = y_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    dist = torch.sqrt((coords_reprojected[:,:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,:,1] - y_src) ** 2)

    mask = torch.where(dist < pixel_th, 1, 0)
    return mask

homography(src_image_file, tgt_image_file)

Computes a homography transformation between two images using image features.

Parameters:

Name Type Description Default
src_image_file str

Input file for the source image.

required
tgt_image_file str

Input file for the target image.

required

Returns:

Type Description
ndarray

The homography matrix to warp the target image to the source image.

Source code in src/cvt/geometry.py
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
def homography(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes a homography transformation between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The homography matrix to warp the target image to the source image.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    (height, width, _) = src_image.shape

    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    H, mask = cv2.findHomography(tgt_points, src_points, method=cv2.RANSAC)

    return H

homography_warp(cfg, features, intrinsics, extrinsics, hypotheses, group_channels, vwa_net=None, vis_weights=None, virtual=False)

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. features: Feature maps to be warped into a PSV. intrinsics: Intrinsics matrices for all views. extrinsics: Extrinsics matrices for all views. hypotheses: Depth hypotheses to use for homography warping. group_channels: Feature channel sizes used in group-wise correlation (GWC). vwa_net: Network used for visibility weighting. vis_weights: Pre-computed visibility weights.

Returns:

Type Description

The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
def homography_warp(cfg, features, intrinsics, extrinsics, hypotheses, group_channels, vwa_net=None, vis_weights=None, virtual=False):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        features: Feature maps to be warped into a PSV.
        intrinsics: Intrinsics matrices for all views.
        extrinsics: Extrinsics matrices for all views.
        hypotheses: Depth hypotheses to use for homography warping.
        group_channels: Feature channel sizes used in group-wise correlation (GWC).
        vwa_net: Network used for visibility weighting.
        vis_weights: Pre-computed visibility weights.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    hypotheses = hypotheses.squeeze(1)
    _,planes,_,_ = hypotheses.shape
    batch_size, C, height, width = features[0].shape
    num_views = len(features)
    device = features[0].device

    if not virtual:
        ref_volume = features[0].unsqueeze(2).repeat(1,1,planes,1,1)

    vis_weight_list = []
    cost_volume = torch.zeros((batch_size,group_channels,planes,height,width), dtype=torch.float32, device=device)
    reweight_sum = torch.zeros((batch_size,1,planes,height,width), dtype=torch.float32, device=device)

    # build reference projection matrix
    ref_proj = torch.matmul(intrinsics[:,0], extrinsics[:,0,0:3])
    last = torch.tensor([[[0,0,0,1.0]]]).repeat(batch_size, 1, 1).cuda()
    ref_proj = torch.cat((ref_proj, last), 1)

    # build coordinates grid
    y, x = torch.meshgrid([torch.arange(0, height, dtype=torch.float32, device=device),
                        torch.arange(0, width, dtype=torch.float32, device=device)],
                        indexing='ij')
    y, x = y.contiguous(), x.contiguous()
    y, x = y.view(height * width), x.view(height * width)
    xyz = torch.stack((x, y, torch.ones_like(x)))
    xyz = torch.unsqueeze(xyz, 0).repeat(batch_size, 1, 1)

    for v in range(1,num_views):
        with torch.no_grad():
            # build source projection matrix
            src_proj = torch.matmul(intrinsics[:,v], extrinsics[:,v,0:3])
            src_proj = torch.cat((src_proj, last), 1)

            # compute full projection matrix between views
            proj = torch.matmul(src_proj, torch.inverse(ref_proj))
            rot = proj[:, :3, :3]
            trans = proj[:, :3, 3:4]

            # Build plane-sweeping coordinates grid between views
            rot_xyz = torch.matmul(rot, xyz)
            rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, planes, 1) * hypotheses.view(batch_size, 1, planes, height*width)
            proj_xyz = rot_depth_xyz + trans.view(batch_size, 3, 1, 1)
            proj_xy = proj_xyz[:, :2] / proj_xyz[:, 2:3]
            proj_x_normalized = proj_xy[:, 0] / ((width - 1) / 2) - 1
            proj_y_normalized = proj_xy[:, 1] / ((height - 1) / 2) - 1
            proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)
            grid = proj_xy
            grid = grid.type(torch.float32)

        src_feature = features[v]
        warped_features = F.grid_sample( src_feature,
                                        grid.view(batch_size, planes * height, width, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        warped_features = warped_features.view(batch_size, C, planes, height, width)

        if virtual:
            if v==1:
                # We continue to the next source view to compute inner product
                # using the first source view is used as reference
                ref_volume = warped_features
                continue

        # compute Pairwise Plane-Sweeping Volume using GWC
        ppsv = groupwise_correlation(warped_features, ref_volume, group_channels)
        if vwa_net is not None:
            reweight = vwa_net(ppsv)
            vis_weight_list.append(reweight)
            reweight = reweight.unsqueeze(1)
            ppsv = reweight*ppsv
        elif vis_weights is not None:
            reweight = vis_weights[v-1]
            if reweight.shape[2] < ppsv.shape[3]:
                reweight = F.interpolate(reweight,scale_factor=2,mode='bilinear',align_corners=False)
            vis_weight_list.append(reweight)
            reweight = reweight.unsqueeze(2)
            ppsv = reweight*ppsv

        cost_volume = cost_volume + ppsv
        reweight_sum = reweight_sum + reweight

        if cfg["mode"]=="inference":
            del src_feature
            del ppsv
            del warped_features
            del reweight
            torch.cuda.empty_cache()

    cost_volume = cost_volume/(reweight_sum+1e-10)

    return cost_volume, vis_weight_list

homography_warp_var(cfg, features, ref_in, src_in, ref_ex, src_ex, depth_hypos)

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. features: Feature maps to be warped into a PSV. level: Current feature resolution level. ref_in: Reference view intrinsics matrix. src_in: Source view intrinsics matrices. ref_ex: Reference view extrinsics matrix. src_ex: Source view extrinsics matrices. depth_hypos: Depth hypotheses to use for homography warping.

Returns:

Type Description

The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
def homography_warp_var(cfg, features, ref_in, src_in, ref_ex, src_ex, depth_hypos):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        features: Feature maps to be warped into a PSV.
        level: Current feature resolution level.
        ref_in: Reference view intrinsics matrix.
        src_in: Source view intrinsics matrices.
        ref_ex: Reference view extrinsics matrix.
        src_ex: Source view extrinsics matrices.
        depth_hypos: Depth hypotheses to use for homography warping.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    depth_hypos = depth_hypos.squeeze(1)
    _,planes,_,_ = depth_hypos.shape

    B,fCH,H,W = features[0].shape
    num_depth = depth_hypos.shape[1]
    nSrc = len(features)-1

    vis_weight_list = []
    ref_volume = features[0].unsqueeze(2).repeat(1,1,num_depth,1,1)

    cost_volume = torch.zeros((nSrc+1,B,fCH,planes,H,W)).to(features[0])
    cost_volume[0] = ref_volume
    reweight_sum = None
    for src in range(nSrc):
        with torch.no_grad():
            with autocast(enabled=False):
                src_proj = torch.matmul(src_in[:,src,:,:],src_ex[:,src,0:3,:])
                ref_proj = torch.matmul(ref_in,ref_ex[:,0:3,:])
                last = torch.tensor([[[0,0,0,1.0]]]).repeat(len(src_in),1,1).cuda()
                src_proj = torch.cat((src_proj,last),1)
                ref_proj = torch.cat((ref_proj,last),1)

                proj = torch.matmul(src_proj, torch.inverse(ref_proj))
                rot = proj[:, :3, :3]  # [B,3,3]
                trans = proj[:, :3, 3:4]  # [B,3,1]

                y, x = torch.meshgrid([torch.arange(0, H, dtype=torch.float32, device=ref_volume.device),
                                    torch.arange(0, W, dtype=torch.float32, device=ref_volume.device)],
                                    indexing='ij')
                y, x = y.contiguous(), x.contiguous()
                y, x = y.view(H * W), x.view(H * W)
                xyz = torch.stack((x, y, torch.ones_like(x)))  # [3, H*W]
                xyz = torch.unsqueeze(xyz, 0).repeat(B, 1, 1)  # [B, 3, H*W]
                rot_xyz = torch.matmul(rot, xyz)  # [B, 3, H*W]

                rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, num_depth, 1) * depth_hypos.view(B, 1, num_depth,H*W)  # [B, 3, Ndepth, H*W]
                proj_xyz = rot_depth_xyz + trans.view(B, 3, 1, 1)  # [B, 3, Ndepth, H*W]
                proj_xy = proj_xyz[:, :2, :, :] / proj_xyz[:, 2:3, :, :]  # [B, 2, Ndepth, H*W]
                proj_x_normalized = proj_xy[:, 0, :, :] / ((W - 1) / 2) - 1
                proj_y_normalized = proj_xy[:, 1, :, :] / ((H - 1) / 2) - 1
                proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)  # [B, Ndepth, H*W, 2]
                grid = proj_xy

        grid = grid.type(ref_volume.dtype)
        warped_src_fea = F.grid_sample( features[src+1],
                                        grid.view(B, num_depth * H, W, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        cost_volume[src+1] = warped_src_fea.view(B, fCH, num_depth, H, W)

        torch.cuda.empty_cache()

    cost_volume = torch.var(cost_volume, dim=0)
    B,C,D,H,W = cost_volume.shape

    return cost_volume

match_features(src_image, tgt_image, max_features=500)

Computer matching ORB features between a pair of images.

Parameters:

Name Type Description Default
src_image ndarray

The source image to compute and match features.

required
tgt_image ndarray

The target image to compute and match features.

required
max_features int

The maximum number of features to retain.

500

Returns:

Name Type Description
src_points ndarray

The set of matched point coordinates for the source image.

tgt_points ndarray

The set of matched point coordinates for the target image.

Source code in src/cvt/geometry.py
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
def match_features(src_image: np.ndarray, tgt_image: np.ndarray, max_features: int = 500) -> Tuple[np.ndarray, np.ndarray]:
    """Computer matching ORB features between a pair of images.

    Args:
        src_image: The source image to compute and match features.
        tgt_image: The target image to compute and match features.
        max_features: The maximum number of features to retain.

    Returns:
        src_points: The set of matched point coordinates for the source image.
        tgt_points: The set of matched point coordinates for the target image.
    """
    src_image = cv2.cvtColor(src_image,cv2.COLOR_BGR2GRAY)
    tgt_image = cv2.cvtColor(tgt_image, cv2.COLOR_BGR2GRAY)

    orb = cv2.ORB_create(max_features)

    src_keypoints, src_descriptors = orb.detectAndCompute(src_image,None)
    tgt_keypoints, tgt_descriptors = orb.detectAndCompute(tgt_image,None)

    matcher = cv2.BFMatcher(crossCheck=True)
    matches = list(matcher.match(src_descriptors, tgt_descriptors) )
    matches.sort(key = lambda x:x.distance)

    src_points = []
    tgt_points = []
    for i in range(8):
        m = matches[i]

        src_points.append(src_keypoints[m.queryIdx].pt)
        tgt_points.append(tgt_keypoints[m.trainIdx].pt)
    src_points  = np.asarray(src_points)
    tgt_points = np.asarray(tgt_points)

    return (src_points, tgt_points)

plane_coords(K, P, depth_hypos, H, W)

Batched PyTorch version

Source code in src/cvt/geometry.py
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
def plane_coords(K, P, depth_hypos, H, W):
    """Batched PyTorch version
    """
    batch_size,_,_ = K.shape
    num_planes = depth_hypos.shape[0]

    xyz = torch.movedim(torch.tensor([[0,0,1], [W-1,0,1], [0,H-1,1], [W-1,H-1,1]], dtype=torch.float32), 0, 1).to(P)
    xyz = xyz.reshape(1,3,4).repeat(batch_size, 1, 1)
    if K.shape[1]==3:
        K_44 = torch.zeros((batch_size, 4, 4)).to(P)
        K_44[:,:3,:3] = K[:,:3,:3]
        K_44[:,3,3] = 1
        K = K_44
    proj = K @ P
    inv_proj = torch.linalg.inv(proj)

    planes = torch.zeros(num_planes, 3, 4).to(inv_proj)
    for p in range(num_planes):
        planes[p] = (inv_proj[0,:3,:3] @ xyz) * depth_hypos[p]
        planes[p] += inv_proj[0,:3,3:4]

    return planes

project_depth_map(depth, cam, mask=None)

Projects a depth map into a list of 3D points

Parameters:

Name Type Description Default
depth Tensor

Input depth map to project.

required
cam Tensor

Camera parameters for input depth map.

required

Returns:

Type Description
Tensor

A float Tensor of 3D points corresponding to the projected depth values.

Source code in src/cvt/geometry.py
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
def project_depth_map(depth: torch.Tensor, cam: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
    """Projects a depth map into a list of 3D points

    Parameters:
        depth: Input depth map to project.
        cam: Camera parameters for input depth map.

    Returns:
        A float Tensor of 3D points corresponding to the projected depth values.
    """
    if (depth.shape[1] == 1):
        depth = depth.squeeze(1)

    batch_size, height, width = depth.shape
    cam_shape = cam.shape

    # get camera extrinsics and intrinsics
    P = cam[:,0,:,:]
    K = cam[:,1,:,:]
    K[:,3,:] = torch.tensor([0,0,0,1])

    # construct back-projection from invers matrices
    # separate into rotation and translation components
    bwd_projection = torch.matmul(torch.inverse(P), torch.inverse(K)).to(torch.float32)
    bwd_rotation = bwd_projection[:,:3,:3]
    bwd_translation = bwd_projection[:,:3,3:4]

    # build 2D homogeneous coordinates tensor: [B, 3, H*W]
    with torch.no_grad():
        row_span = torch.arange(0, height, dtype=torch.float32).cuda()
        col_span = torch.arange(0, width, dtype=torch.float32).cuda()
        r,c = torch.meshgrid(row_span, col_span, indexing="ij")
        r,c = r.contiguous(), c.contiguous()
        r,c = r.reshape(height*width), c.reshape(height*width)
        coords = torch.stack((c,r,torch.ones_like(c)))
        coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1)

    # compute 3D coordinates using the depth map: [B, H*W, 3]
    world_coords = torch.matmul(bwd_rotation, coords)
    depth = depth.reshape(batch_size, 1, -1)
    world_coords = world_coords * depth
    world_coords = world_coords + bwd_translation

    #TODO: make sure index select is differentiable
    #       (there is a backward function but need to find the code..)
    if (mask != None):
        world_coords = torch.index_select(world_coords, dim=2, index=non_zero_inds)
        world_coords = torch.movedim(world_coords, 1, 2)

    # reshape 3D coordinates back into 2D map: [B, H, W, 3]
    #   coords_map = world_coords.reshape(batch_size, height, width, 3)

    return world_coords

project_renderer(renderer, K, P, width, height)

Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

Parameters:

Name Type Description Default
renderer OffscreenRenderer

Geometric scene to be projected.

required
K ndarray

Camera intrinsic parameters.

required
P ndarray

Camera extrinsic parameters.

required
width float

Desired image width.

required
height float

Desired image height.

required

Returns:

Type Description
ndarray

The rendered image for the scene at the specified camera viewpoint.

Source code in src/cvt/geometry.py
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
def project_renderer(renderer: o3d.visualization.rendering.OffscreenRenderer, K: np.ndarray, P: np.ndarray, width: float, height: float) -> np.ndarray:
    """Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

    Parameters:
        renderer: Geometric scene to be projected.
        K: Camera intrinsic parameters.
        P: Camera extrinsic parameters.
        width: Desired image width.
        height: Desired image height.

    Returns:
        The rendered image for the scene at the specified camera viewpoint.
    """
    # set up the renderer
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, K[0,0], K[1,1], K[0,2], K[1,2])
    renderer.setup_camera(intrins, P)

    # render image
    image = np.asarray(renderer.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image

psv(cfg, images, intrinsics, extrinsics, depth_hypos)

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. images: image maps to be warped into a PSV. intrinsics: intrinsics matrices. extrinsics: extrinsics matrices. depth_hypos: Depth hypotheses to use for homography warping.

Returns:

Type Description

The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
def psv(cfg, images, intrinsics, extrinsics, depth_hypos):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        images: image maps to be warped into a PSV.
        intrinsics: intrinsics matrices.
        extrinsics: extrinsics matrices.
        depth_hypos: Depth hypotheses to use for homography warping.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    depth_hypos = depth_hypos.squeeze(1)
    _,planes,_,_ = depth_hypos.shape
    B,views,C,H,W = images.shape

    pairwise_psv = []
    for v in range(1,views):
        with torch.no_grad():
            src_proj = torch.matmul(intrinsics[:,v], extrinsics[:,v,0:3])
            ref_proj = torch.matmul(intrinsics[:,0], extrinsics[:,0,0:3])
            last = torch.tensor([[[0,0,0,1.0]]]).repeat(B,1,1).to(images.device)
            src_proj = torch.cat((src_proj,last),1)
            ref_proj = torch.cat((ref_proj,last),1)

            proj = torch.matmul(src_proj, torch.inverse(ref_proj))
            rot = proj[:, :3, :3]  # [B,3,3]
            trans = proj[:, :3, 3:4]  # [B,3,1]

            y, x = torch.meshgrid([torch.arange(0, H, dtype=torch.float32, device=images.device),
                                torch.arange(0, W, dtype=torch.float32, device=images.device)],
                                indexing='ij')
            y, x = y.contiguous(), x.contiguous()
            y, x = y.view(H * W), x.view(H * W)
            xyz = torch.stack((x, y, torch.ones_like(x)))  # [3, H*W]
            xyz = torch.unsqueeze(xyz, 0).repeat(B, 1, 1)  # [B, 3, H*W]
            rot_xyz = torch.matmul(rot, xyz)  # [B, 3, H*W]

            rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, planes, 1) * depth_hypos.view(B, 1, planes, H*W)  # [B, 3, Ndepth, H*W]
            proj_xyz = rot_depth_xyz + trans.view(B, 3, 1, 1)  # [B, 3, Ndepth, H*W]
            proj_xy = proj_xyz[:, :2, :, :] / proj_xyz[:, 2:3, :, :]  # [B, 2, Ndepth, H*W]
            proj_x_normalized = proj_xy[:, 0, :, :] / ((W - 1) / 2) - 1
            proj_y_normalized = proj_xy[:, 1, :, :] / ((H - 1) / 2) - 1
            proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)  # [B, Ndepth, H*W, 2]
            grid = proj_xy
        grid = grid.type(images.dtype)

        warped_src_image = F.grid_sample( images[:,v],
                                        grid.view(B, planes * H, W, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        pairwise_psv.append(warped_src_image.view(B, C, planes, H, W))

    return pairwise_psv

render_custom_values(points, values, image_shape, cam)

Renders a point cloud into a 2D camera plane using custom values for each pixel.

Parameters:

Name Type Description Default
points ndarray

List of 3D points to be rendered.

required
values ndarray

List of values to be written in the rendered image.

required
image_shape Tuple[int, int]

Desired shape (height,width) of the rendered image.

required
cam ndarray

Camera parameters for the image viewpoint.

required

Returns:

Type Description
ndarray

The rendered image for the list of points using the sepcified corresponding values.

Source code in src/cvt/geometry.py
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
def render_custom_values(points: np.ndarray, values: np.ndarray, image_shape: Tuple[int,int], cam: np.ndarray) -> np.ndarray:
    """Renders a point cloud into a 2D camera plane using custom values for each pixel.

    Parameters:
        points: List of 3D points to be rendered.
        values: List of values to be written in the rendered image.
        image_shape: Desired shape (height,width) of the rendered image.
        cam: Camera parameters for the image viewpoint.

    Returns:
        The rendered image for the list of points using the sepcified corresponding values.
    """
    points = points.tolist()
    values = list(values.astype(float))
    cam = cam.flatten().tolist()

    rendered_img = rd.render(list(image_shape), points, values, cam)

    return rendered_img

render_point_cloud(render, intrins, pose)

Renders a point cloud into a 2D image plane.

Parameters:

Returns:

Source code in src/cvt/geometry.py
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
def render_point_cloud(render, intrins, pose):
    """Renders a point cloud into a 2D image plane.

    Parameters:

    Returns:
    """
    render.setup_camera(intrins, pose)

    # render image
    image = np.asarray(render.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    depth = np.asarray(render.render_to_depth_image(z_in_view_space=True))

    return image, depth

render_point_cloud_single(cloud, pose, K, width, height)

Renders a point cloud into a 2D image plane.

Parameters:

Name Type Description Default
cloud PointCloud

Point cloud to be rendered.

required
pose ndarray

Camera extrinsic parameters for the image plane.

required
K ndarray

Camera intrinsic parameters for the image plane.

required
width int

Desired width of the rendered image.

required
height int

Desired height of the rendered image.

required

Returns:

Type Description
ndarray

The rendered image for the point cloud at the specified camera viewpoint.

Source code in src/cvt/geometry.py
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
def render_point_cloud_single(cloud: o3d.geometry.PointCloud, pose: np.ndarray, K: np.ndarray, width: int, height: int) -> np.ndarray:
    """Renders a point cloud into a 2D image plane.

    Parameters:
        cloud: Point cloud to be rendered.
        pose: Camera extrinsic parameters for the image plane.
        K: Camera intrinsic parameters for the image plane.
        width: Desired width of the rendered image.
        height: Desired height of the rendered image.

    Returns:
        The rendered image for the point cloud at the specified camera viewpoint.
    """
    #   cmap = plt.get_cmap("hot_r")
    #   colors = cmap(dists)[:, :3]
    #   ply.colors = o3d.utility.Vector3dVector(colors)

    # set up the renderer
    render = o3d.visualization.rendering.OffscreenRenderer(width, height)
    mat = o3d.visualization.rendering.MaterialRecord()
    mat.shader = 'defaultUnlit'
    render.scene.add_geometry("cloud", cloud, mat)
    render.scene.set_background(np.asarray([0,0,0,1])) #r,g,b,a
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, K[0,0], K[1,1], K[0,2], K[1,2])
    render.setup_camera(intrins, pose)

    # render image
    image = np.asarray(render.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    depth = np.asarray(render.render_to_depth_image(z_in_view_space=True))

    return image, depth

reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)

Computes the re-projection depth values and pixel indices between two depth maps.

This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source depth map is first projected into the target camera plane using the source depth values and the camera parameters for both views. Using the projected pixel coordinates in the target view, the target depths are then re-projected back into the source camera plane (again with the camera parameters for both views). The information prouced from this process is often used to compute errors in re-projection between two depth maps, or similar operations.

Parameters:

Name Type Description Default
src_depth

Source depth map to be projected.

required
src_K

Intrinsic camera parameters for the source depth map viewpoint.

required
src_P

Extrinsic camera parameters for the source depth map viewpoint.

required
tgt_depth

Target depth map used for re-projection.

required
tgt_K

Intrinsic camera parameters for the target depth map viewpoint.

required
tgt_P

Extrinsic camera parameters for the target depth map viewpoint.

required

Returns:

Name Type Description
depth_reprojected

The re-projected depth values for the source depth map.

coords_reprojected

The re-projection coordinates for the source view.

coords_tgt

The projected coordinates for the target view.

Source code in src/cvt/geometry.py
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
def reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P):
    """Computes the re-projection depth values and pixel indices between two depth maps.

    This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source
    depth map is first projected into the target camera plane using the source depth
    values and the camera parameters for both views. Using the projected pixel
    coordinates in the target view, the target depths are then re-projected back into
    the source camera plane (again with the camera parameters for both views). The
    information prouced from this process is often used to compute errors in
    re-projection between two depth maps, or similar operations.

    Parameters:
        src_depth: Source depth map to be projected.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.

    Returns:
        depth_reprojected: The re-projected depth values for the source depth map.
        coords_reprojected: The re-projection coordinates for the source view.
        coords_tgt: The projected coordinates for the target view.
    """
    batch_size, c, height, width = src_depth.shape

    # back-project ref depths to 3D
    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    y_src = y_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    homog = torch.stack((x_src, y_src, torch.ones_like(x_src)), dim=1)
    xyz_src = torch.matmul(torch.linalg.inv(src_K), homog * src_depth.reshape(batch_size, 1, -1))

    # transform 3D points from ref to src coords
    homog_3d = torch.concatenate((xyz_src, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.matmul(tgt_P, torch.linalg.inv(src_P)), homog_3d)[:,:3]

    # project src 3D points into pixel coords
    K_xyz_tgt = torch.matmul(tgt_K, xyz_tgt)
    xy_tgt = K_xyz_tgt[:,:2] / K_xyz_tgt[:,2:3]
    x_tgt = xy_tgt[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_tgt = xy_tgt[:,1].reshape(batch_size, height, width).to(torch.float32)
    coords_tgt = torch.stack((x_tgt, y_tgt), dim=-1) # B x H x W x 2

    # sample the depth values from the src map at each pixel coord
    x_normalized = ((x_tgt / (width-1)) * 2) - 1
    y_normalized = ((y_tgt / (height-1)) * 2) - 1
    grid = torch.stack((x_normalized, y_normalized), dim=-1) # B x H x W x 2
    sampled_depth_tgt = F.grid_sample(
                                    tgt_depth,
                                    grid,
                                    mode="bilinear",
                                    padding_mode="zeros",
                                    align_corners=False)

    # back-project src depths to 3D
    homog = torch.concatenate((xy_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.linalg.inv(tgt_K), homog * sampled_depth_tgt.reshape(batch_size, 1, -1))

    # transform 3D points from src to ref coords
    homog_3d = torch.concatenate((xyz_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_reprojected = torch.matmul(torch.matmul(src_P, torch.linalg.inv(tgt_P)), homog_3d)[:,:3]

    # extract reprojected depth values
    depth_reprojected = xyz_reprojected[:,2].reshape(batch_size, height, width).to(torch.float32)

    # project ref 3D points into pixel coords
    K_xyz_reprojected = torch.matmul(src_K, xyz_reprojected)
    xy_reprojected = K_xyz_reprojected[:,:2] / (K_xyz_reprojected[:,2:3] + 1e-7)
    x_reprojected = xy_reprojected[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_reprojected = xy_reprojected[:,1].reshape(batch_size, height, width).to(torch.float32)

    coords_reprojected = torch.stack((x_reprojected, y_reprojected), dim=-1) # B x H x W x 2

    return depth_reprojected, coords_reprojected, coords_tgt

resolution_based_hypothesis(data, target_hypo, level, focal_length, min_hypo, max_hypo, delta_in=1)

Parameters:

Returns:

Source code in src/cvt/geometry.py
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
def resolution_based_hypothesis(data, target_hypo, level, focal_length, min_hypo, max_hypo, delta_in=1):
    """
    Parameters:

    Returns:
    """
    B, _, D, H, W = target_hypo.shape
    rand_match_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    rand_near_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    rand_far_offset = torch.rand(B,1,M,H,W).to(target_hypo)

    near, far = Z_from_disp(target_hypo, data["baseline"], focal_length, delta=delta_in)
    target_range = torch.abs(far - near).repeat(1,1,M,1,1)
    near_range = torch.abs(near - min_hypo).repeat(1,1,M,1,1)
    far_range = torch.abs(max_hypo - far).repeat(1,1,M,1,1)

    target_samples = (rand_match_offset * target_range) + near
    near_samples = (rand_near_offset * near_range) + min_hypo
    far_samples = (rand_far_offset * far_range) + far
    samples = torch.cat([target_samples,near_samples,far_samples], dim=1)
    samples = samples.reshape(B,-1,H,W).unsqueeze(1) # [B, 1, M*3, H, W]

    mask = torch.where(target_hypo <= 0, 0.0, 1.0).repeat(1,1,M*3,1,1)
    hypos = torch.clip(samples, min_hypo, max_hypo) * mask

    return hypos

sample_volume(volume, z_vals, coords, H, W, near_depth, far_depth, inv_depth)

Parameters:

Returns:

Source code in src/cvt/geometry.py
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
def sample_volume(volume, z_vals, coords, H, W, near_depth, far_depth, inv_depth):
    """
    Parameters:

    Returns:
    """
    N, M = z_vals.shape
    batch_size, c, _, _, _ = volume.shape

    z_vals = z_vals.reshape(N,M,1) # N x M x 1
    if inv_depth:
        z_vals = 1/z_vals
        near_depth = 1/near_depth
        far_depth = 1/far_depth
    coords = coords.reshape(N,1,2).repeat(1,M,1) # N x M x 2
    x_coords = coords[:,:,1:2]
    y_coords = coords[:,:,0:1]
    points = torch.cat([x_coords, y_coords, z_vals], dim=-1) # N x M x 3
    points = torch.reshape(points, [-1, 3]) # N*M x 3

    # define coordinates bounds
    min_coord = torch.tensor([0,0,near_depth]).to(points)
    max_coord = torch.tensor([W-1,H-1,far_depth]).to(points)
    min_coord = min_coord.reshape(1,3).repeat(N*M,1)
    max_coord = max_coord.reshape(1,3).repeat(N*M,1) 

    # normalize points
    norm_points = (points - min_coord) / (max_coord - min_coord)
    norm_points = norm_points.unsqueeze(0).repeat(batch_size, 1, 1)
    norm_points = (norm_points * 2) - 1

    # Note: The shape of the input volume is 5D: Batch x Channels x Depth x Height x Width.
    #       The input coordinates must be in [x, y, z] format where x->width, y->height, z->depth.
    #       These coordinates must be normalized between [-1, 1].
    features = F.grid_sample(volume,
                            norm_points.view(batch_size, N*M, 1, 1, 3),
                            mode='bilinear',
                            padding_mode='zeros',
                            align_corners=True)
    features = torch.movedim(features.reshape(c, N*M), 0, 1) # N*M x c

    return features

soft_hypothesis(data, target_hypo, focal_length, min_hypo, max_hypo, M, delta_in=1)

Parameters:

Returns:

Source code in src/cvt/geometry.py
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
def soft_hypothesis(data, target_hypo, focal_length, min_hypo, max_hypo, M, delta_in=1):
    """
    Parameters:

    Returns:
    """
    B, _, D, H, W = target_hypo.shape
    rand_match_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    near, far = Z_from_disp(target_hypo, data["baseline"], focal_length, delta=delta_in)
    target_range = torch.abs(far - near).repeat(1,1,M,1,1)

    target_samples = (rand_match_offset * target_range) + near
    mask = torch.where(target_hypo <= 0, 0.0, 1.0).repeat(1,1,M,1,1)
    matching_hypos = torch.clip(target_samples, min_hypo, max_hypo) * mask

    return matching_hypos

uniform_hypothesis(cfg, device, batch_size, depth_min, depth_max, img_height, img_width, planes, inv_depth=False, bin_format=False)

Parameters:

Returns:

Source code in src/cvt/geometry.py
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
def uniform_hypothesis(cfg, device, batch_size, depth_min, depth_max, img_height, img_width, planes, inv_depth=False, bin_format=False):
    """
    Parameters:

    Returns:
    """
    depth_range = depth_max-depth_min

    hypotheses = torch.zeros((batch_size,planes),device=device)
    for b in range(0,batch_size):
        if bin_format:
            spacing = depth_range/planes
            start_depth = depth_min + (spacing/2)
            end_depth = depth_min + (spacing/2) + ((planes-1)*spacing)
        else:
            start_depth = depth_min
            end_depth = depth_max
        if inv_depth:
            hypotheses[b] = 1/(torch.linspace(1/start_depth,1/end_depth,steps=planes,device=device))
        else:
            hypotheses[b] = torch.linspace(start_depth, end_depth, steps=planes,device=device)
    hypotheses = hypotheses.unsqueeze(2).unsqueeze(3).repeat(1,1,img_height,img_width)

    # Make coordinate for depth hypothesis, to be used by sparse convolution.
    depth_hypo_coords = torch.zeros((batch_size,planes),device=device)
    for b in range(0,batch_size):
        depth_hypo_coords[b] = torch.linspace(0,planes-1,steps=planes,device=device)
    depth_hypo_coords = depth_hypo_coords.unsqueeze(2).unsqueeze(3).repeat(1,1,img_height,img_width)

    # Calculate hypothesis interval
    hypo_intervals = hypotheses[:,1:]-hypotheses[:,:-1]
    hypo_intervals = torch.cat((hypo_intervals,hypo_intervals[:,-1].unsqueeze(1)),dim=1)

    return hypotheses.unsqueeze(1), depth_hypo_coords.unsqueeze(1), hypo_intervals.unsqueeze(1)

visibility(depths, K, Ps, vis_th, levels=4)

Parameters:

Returns:

Source code in src/cvt/geometry.py
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
def visibility(depths, K, Ps, vis_th, levels=4):
    """
    Parameters:

    Returns:
    """
    batch_size, views, c, H, W = depths.shape

    K_pyr = intrinsic_pyramid(K, levels)

    vis_maps = []
    vis_masks = []
    for l in range(levels):
        resized_depths = tvf.resize(depths[:,:,0], [int(H/(2**l)), int(W/(2**l))]).unsqueeze(2)
        batch_size, views, c, height, width = resized_depths.shape
        vis_map = torch.where(resized_depths[:,0] > 0.0, 1, 0)

        for i in range(1, views):
            mask = geometric_consistency_mask(resized_depths[:,0], K_pyr[:,l], Ps[:,0], resized_depths[:,i], K_pyr[:,l], Ps[:,i], pixel_th=0.5)
            vis_map += mask.unsqueeze(1)
        vis_map = vis_map.to(torch.float32)

        vis_maps.append(vis_map)
        vis_masks.append(torch.where(vis_map >= vis_th, 1, 0))
    return vis_maps, vis_masks

visibility_mask(src_depth, src_cam, depth_files, cam_files, src_ind=-1, pixel_th=0.1)

Computes a visibility mask between a provided source depth map and list of target depth maps.

Parameters:

Name Type Description Default
src_depth ndarray

Depth map for the source view.

required
src_cam ndarray

Camera parameters for the source depth map viewpoint.

required
depth_files List[str]

List of target depth maps.

required
cam_files List[str]

List of corresponding target camera parameters for each targte depth map viewpoint.

required
src_ind int

Index into 'depth_files' corresponding to the source depth map (if included in the list).

-1
pixel_th float

Pixel re-projection threshold to determine matching depth estimates.

0.1

Returns:

Type Description
ndarray

The visibility mask for the source view.

Source code in src/cvt/geometry.py
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
def visibility_mask(src_depth: np.ndarray, src_cam: np.ndarray, depth_files: List[str], cam_files: List[str], src_ind: int = -1, pixel_th: float = 0.1) -> np.ndarray:
    """Computes a visibility mask between a provided source depth map and list of target depth maps.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        depth_files: List of target depth maps.
        cam_files: List of corresponding target camera parameters for each targte depth map viewpoint.
        src_ind: Index into 'depth_files' corresponding to the source depth map (if included in the list).
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The visibility mask for the source view.
    """
    height, width = src_depth.shape
    vis_map = np.not_equal(src_depth, 0.0).astype(np.double)

    for i in range(len(depth_files)):
        if (i==src_ind):
            continue

        # get files
        sdf = depth_files[i]
        scf = cam_files[i]

        # load data
        tgt_depth = read_pfm(sdf)
        tgt_cam = read_single_cam_sfm(scf,'r')

        mask = geometric_consistency_mask(src_depth, src_cam, tgt_depth, tgt_cam, pixel_th)
        vis_map += mask

    return vis_map.astype(np.float32)