cvt.geometry

Module including geometric routines.

`essential_from_features(src_image_file, tgt_image_file, K)`

Computes the essential matrix between two images using image features.

Parameters:

Name	Type	Description	Default
`src_image_file`	`str`	Input file for the source image.	required
`tgt_image_file`	`str`	Input file for the target image.	required
`K`	`ndarray`	Intrinsics matrix of the two cameras (assumed to be constant between views).	required

Returns:

Type	Description
`ndarray`	The essential matrix betweent the two image views.

Source code in src/cvt/geometry.py

def essential_from_features(src_image_file: str, tgt_image_file: str, K: np.ndarray) -> np.ndarray:
    """Computes the essential matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).

    Returns:
        The essential matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    E, mask = cv2.findEssentialMat(src_points, tgt_points, K, method=cv2.RANSAC)

    return E

`fundamental_from_KP(K, P_src, P_tgt)`

Computes the fundamental matrix between two images using camera parameters.

Parameters:

Name	Type	Description	Default
`K`	`ndarray`	Intrinsics matrix of the two cameras (assumed to be constant between views).	required
`P_src`	`ndarray`	Extrinsics matrix for the source view.	required
`P_tgt`	`ndarray`	Extrinsics matrix for the target view.	required

Returns:

Type	Description
`ndarray`	The fundamental matrix betweent the two cameras.

Source code in src/cvt/geometry.py

def fundamental_from_KP(K: np.ndarray, P_src: np.ndarray, P_tgt: np.ndarray) -> np.ndarray:
    """Computes the fundamental matrix between two images using camera parameters.

    Parameters:
        K: Intrinsics matrix of the two cameras (assumed to be constant between views).
        P_src: Extrinsics matrix for the source view.
        P_tgt: Extrinsics matrix for the target view.

    Returns:
        The fundamental matrix betweent the two cameras.
    """
    F_mats = []
    for i in range(K.shape[0]):
        R1 = P_src[i,0:3,0:3]
        t1 = P_src[i,0:3,3]
        R2 = P_tgt[i,0:3,0:3]
        t2 = P_tgt[i,0:3,3]

        t1aug = torch.tensor([t1[0], t1[1], t1[2], 1]).to(K)
        epi2 = torch.matmul(P_tgt[i],t1aug)
        epi2 = torch.matmul(K[i],epi2[0:3])

        R = torch.matmul(R2,torch.t(R1))
        t = t2 - torch.matmul(R,t1)
        K1inv = torch.linalg.inv(K[i])
        K2invT = torch.t(K1inv)
        tx = torch.tensor([[0, -t[2], t[1]], [t[2], 0, -t[0]], [-t[1], t[0], 0]]).to(K)
        F = torch.matmul(K2invT,torch.matmul(tx,torch.matmul(R,K1inv)))
        F = F/(torch.max(F)+1e-10)
        F_mats.append(F)

    return torch.stack(F_mats, dim=0)

`fundamental_from_features(src_image_file, tgt_image_file)`

Computes the fundamental matrix between two images using image features.

Parameters:

Name	Type	Description	Default
`src_image_file`	`str`	Input file for the source image.	required
`tgt_image_file`	`str`	Input file for the target image.	required

Returns:

Type	Description
`ndarray`	The fundamental matrix betweent the two image views.

Source code in src/cvt/geometry.py

def fundamental_from_features(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes the fundamental matrix between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The fundamental matrix betweent the two image views.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    # compute matching features
    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    F, mask = cv2.findFundamentalMat(src_points,tgt_points,cv2.FM_8POINT)

    return F

`geometric_consistency_error(src_depth, src_cam, tgt_depth, tgt_cam)`

Computes the geometric consistency error between a source and target depth map.

Parameters:

Name	Type	Description	Default
`src_depth`	`ndarray`	Depth map for the source view.	required
`src_cam`	`ndarray`	Camera parameters for the source depth map viewpoint.	required
`tgt_depth`	`ndarray`	Depth map for the target view.	required
`tgt_cam`	`ndarray`	Camera parameters for the target depth map viewpoint.	required

Returns:

Type	Description
`ndarray`	The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in src/cvt/geometry.py

def geometric_consistency_error(src_depth: np.ndarray, src_cam: np.ndarray, tgt_depth: np.ndarray, tgt_cam: np.ndarray) -> np.ndarray:
    """Computes the geometric consistency error between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        tgt_depth: Depth map for the target view.
        tgt_cam: Camera parameters for the target depth map viewpoint.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    height, width = src_depth.shape
    x_src, y_src = np.meshgrid(np.arange(0, width), np.arange(0, height))

    depth_reprojected, coords_reprojected, coords_tgt, projection_map = reproject(src_depth, src_cam, tgt_depth, tgt_cam)

    dist = np.sqrt((coords_reprojected[:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,1] - y_src) ** 2)

    return dist, projection_map

`geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th)`

Computes the geometric consistency mask between a source and target depth map.

Parameters:

Name	Description	Default
`src_depth`	Depth map for the source view.	required
`src_K`	Intrinsic camera parameters for the source depth map viewpoint.	required
`src_P`	Extrinsic camera parameters for the source depth map viewpoint.	required
`tgt_depth`	Target depth map used for re-projection.	required
`tgt_K`	Intrinsic camera parameters for the target depth map viewpoint.	required
`tgt_P`	Extrinsic camera parameters for the target depth map viewpoint.	required
`pixel_th`	Pixel re-projection threshold to determine matching depth estimates.	required

Returns:

Type	Description
	The binary consistency mask encoding depth consensus between source and target depth maps.

Source code in src/cvt/geometry.py

def geometric_consistency_mask(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P, pixel_th):
    """Computes the geometric consistency mask between a source and target depth map.

    Parameters:
        src_depth: Depth map for the source view.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The binary consistency mask encoding depth consensus between source and target depth maps.
    """
    batch_size, c, height, width = src_depth.shape
    depth_reprojected, coords_reprojected, coords_tgt = reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)

    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    y_src = y_src.unsqueeze(0).repeat(batch_size, 1, 1).to(src_depth)
    dist = torch.sqrt((coords_reprojected[:,:,:,0] - x_src) ** 2 + (coords_reprojected[:,:,:,1] - y_src) ** 2)

    mask = torch.where(dist < pixel_th, 1, 0)
    return mask

`homography(src_image_file, tgt_image_file)`

Computes a homography transformation between two images using image features.

Parameters:

Name	Type	Description	Default
`src_image_file`	`str`	Input file for the source image.	required
`tgt_image_file`	`str`	Input file for the target image.	required

Returns:

Type	Description
`ndarray`	The homography matrix to warp the target image to the source image.

Source code in src/cvt/geometry.py

def homography(src_image_file: str, tgt_image_file: str) -> np.ndarray:
    """Computes a homography transformation between two images using image features.

    Parameters:
        src_image_file: Input file for the source image.
        tgt_image_file: Input file for the target image.

    Returns:
        The homography matrix to warp the target image to the source image.
    """
    src_image = cv2.imread(src_image_file)
    tgt_image = cv2.imread(tgt_image_file)

    (height, width, _) = src_image.shape

    (src_points, tgt_points) = match_features(src_image, tgt_image)

    # Compute fundamental matrix
    H, mask = cv2.findHomography(tgt_points, src_points, method=cv2.RANSAC)

    return H

`homography_warp(cfg, features, intrinsics, extrinsics, hypotheses, group_channels, vwa_net=None, vis_weights=None, virtual=False)`

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. features: Feature maps to be warped into a PSV. intrinsics: Intrinsics matrices for all views. extrinsics: Extrinsics matrices for all views. hypotheses: Depth hypotheses to use for homography warping. group_channels: Feature channel sizes used in group-wise correlation (GWC). vwa_net: Network used for visibility weighting. vis_weights: Pre-computed visibility weights.

Returns:

Type	Description
	The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py

def homography_warp(cfg, features, intrinsics, extrinsics, hypotheses, group_channels, vwa_net=None, vis_weights=None, virtual=False):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        features: Feature maps to be warped into a PSV.
        intrinsics: Intrinsics matrices for all views.
        extrinsics: Extrinsics matrices for all views.
        hypotheses: Depth hypotheses to use for homography warping.
        group_channels: Feature channel sizes used in group-wise correlation (GWC).
        vwa_net: Network used for visibility weighting.
        vis_weights: Pre-computed visibility weights.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    hypotheses = hypotheses.squeeze(1)
    _,planes,_,_ = hypotheses.shape
    batch_size, C, height, width = features[0].shape
    num_views = len(features)
    device = features[0].device

    if not virtual:
        ref_volume = features[0].unsqueeze(2).repeat(1,1,planes,1,1)

    vis_weight_list = []
    cost_volume = torch.zeros((batch_size,group_channels,planes,height,width), dtype=torch.float32, device=device)
    reweight_sum = torch.zeros((batch_size,1,planes,height,width), dtype=torch.float32, device=device)

    # build reference projection matrix
    ref_proj = torch.matmul(intrinsics[:,0], extrinsics[:,0,0:3])
    last = torch.tensor([[[0,0,0,1.0]]]).repeat(batch_size, 1, 1).cuda()
    ref_proj = torch.cat((ref_proj, last), 1)

    # build coordinates grid
    y, x = torch.meshgrid([torch.arange(0, height, dtype=torch.float32, device=device),
                        torch.arange(0, width, dtype=torch.float32, device=device)],
                        indexing='ij')
    y, x = y.contiguous(), x.contiguous()
    y, x = y.view(height * width), x.view(height * width)
    xyz = torch.stack((x, y, torch.ones_like(x)))
    xyz = torch.unsqueeze(xyz, 0).repeat(batch_size, 1, 1)

    for v in range(1,num_views):
        with torch.no_grad():
            # build source projection matrix
            src_proj = torch.matmul(intrinsics[:,v], extrinsics[:,v,0:3])
            src_proj = torch.cat((src_proj, last), 1)

            # compute full projection matrix between views
            proj = torch.matmul(src_proj, torch.inverse(ref_proj))
            rot = proj[:, :3, :3]
            trans = proj[:, :3, 3:4]

            # Build plane-sweeping coordinates grid between views
            rot_xyz = torch.matmul(rot, xyz)
            rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, planes, 1) * hypotheses.view(batch_size, 1, planes, height*width)
            proj_xyz = rot_depth_xyz + trans.view(batch_size, 3, 1, 1)
            proj_xy = proj_xyz[:, :2] / proj_xyz[:, 2:3]
            proj_x_normalized = proj_xy[:, 0] / ((width - 1) / 2) - 1
            proj_y_normalized = proj_xy[:, 1] / ((height - 1) / 2) - 1
            proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)
            grid = proj_xy
            grid = grid.type(torch.float32)

        src_feature = features[v]
        warped_features = F.grid_sample( src_feature,
                                        grid.view(batch_size, planes * height, width, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        warped_features = warped_features.view(batch_size, C, planes, height, width)

        if virtual:
            if v==1:
                # We continue to the next source view to compute inner product
                # using the first source view is used as reference
                ref_volume = warped_features
                continue

        # compute Pairwise Plane-Sweeping Volume using GWC
        ppsv = groupwise_correlation(warped_features, ref_volume, group_channels)
        if vwa_net is not None:
            reweight = vwa_net(ppsv)
            vis_weight_list.append(reweight)
            reweight = reweight.unsqueeze(1)
            ppsv = reweight*ppsv
        elif vis_weights is not None:
            reweight = vis_weights[v-1]
            if reweight.shape[2] < ppsv.shape[3]:
                reweight = F.interpolate(reweight,scale_factor=2,mode='bilinear',align_corners=False)
            vis_weight_list.append(reweight)
            reweight = reweight.unsqueeze(2)
            ppsv = reweight*ppsv

        cost_volume = cost_volume + ppsv
        reweight_sum = reweight_sum + reweight

        if cfg["mode"]=="inference":
            del src_feature
            del ppsv
            del warped_features
            del reweight
            torch.cuda.empty_cache()

    cost_volume = cost_volume/(reweight_sum+1e-10)

    return cost_volume, vis_weight_list

`homography_warp_var(cfg, features, ref_in, src_in, ref_ex, src_ex, depth_hypos)`

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. features: Feature maps to be warped into a PSV. level: Current feature resolution level. ref_in: Reference view intrinsics matrix. src_in: Source view intrinsics matrices. ref_ex: Reference view extrinsics matrix. src_ex: Source view extrinsics matrices. depth_hypos: Depth hypotheses to use for homography warping.

Returns:

Type	Description
	The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py

def homography_warp_var(cfg, features, ref_in, src_in, ref_ex, src_ex, depth_hypos):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        features: Feature maps to be warped into a PSV.
        level: Current feature resolution level.
        ref_in: Reference view intrinsics matrix.
        src_in: Source view intrinsics matrices.
        ref_ex: Reference view extrinsics matrix.
        src_ex: Source view extrinsics matrices.
        depth_hypos: Depth hypotheses to use for homography warping.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    depth_hypos = depth_hypos.squeeze(1)
    _,planes,_,_ = depth_hypos.shape

    B,fCH,H,W = features[0].shape
    num_depth = depth_hypos.shape[1]
    nSrc = len(features)-1

    vis_weight_list = []
    ref_volume = features[0].unsqueeze(2).repeat(1,1,num_depth,1,1)

    cost_volume = torch.zeros((nSrc+1,B,fCH,planes,H,W)).to(features[0])
    cost_volume[0] = ref_volume
    reweight_sum = None
    for src in range(nSrc):
        with torch.no_grad():
            with autocast(enabled=False):
                src_proj = torch.matmul(src_in[:,src,:,:],src_ex[:,src,0:3,:])
                ref_proj = torch.matmul(ref_in,ref_ex[:,0:3,:])
                last = torch.tensor([[[0,0,0,1.0]]]).repeat(len(src_in),1,1).cuda()
                src_proj = torch.cat((src_proj,last),1)
                ref_proj = torch.cat((ref_proj,last),1)

                proj = torch.matmul(src_proj, torch.inverse(ref_proj))
                rot = proj[:, :3, :3]  # [B,3,3]
                trans = proj[:, :3, 3:4]  # [B,3,1]

                y, x = torch.meshgrid([torch.arange(0, H, dtype=torch.float32, device=ref_volume.device),
                                    torch.arange(0, W, dtype=torch.float32, device=ref_volume.device)],
                                    indexing='ij')
                y, x = y.contiguous(), x.contiguous()
                y, x = y.view(H * W), x.view(H * W)
                xyz = torch.stack((x, y, torch.ones_like(x)))  # [3, H*W]
                xyz = torch.unsqueeze(xyz, 0).repeat(B, 1, 1)  # [B, 3, H*W]
                rot_xyz = torch.matmul(rot, xyz)  # [B, 3, H*W]

                rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, num_depth, 1) * depth_hypos.view(B, 1, num_depth,H*W)  # [B, 3, Ndepth, H*W]
                proj_xyz = rot_depth_xyz + trans.view(B, 3, 1, 1)  # [B, 3, Ndepth, H*W]
                proj_xy = proj_xyz[:, :2, :, :] / proj_xyz[:, 2:3, :, :]  # [B, 2, Ndepth, H*W]
                proj_x_normalized = proj_xy[:, 0, :, :] / ((W - 1) / 2) - 1
                proj_y_normalized = proj_xy[:, 1, :, :] / ((H - 1) / 2) - 1
                proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)  # [B, Ndepth, H*W, 2]
                grid = proj_xy

        grid = grid.type(ref_volume.dtype)
        warped_src_fea = F.grid_sample( features[src+1],
                                        grid.view(B, num_depth * H, W, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        cost_volume[src+1] = warped_src_fea.view(B, fCH, num_depth, H, W)

        torch.cuda.empty_cache()

    cost_volume = torch.var(cost_volume, dim=0)
    B,C,D,H,W = cost_volume.shape

    return cost_volume

`match_features(src_image, tgt_image, max_features=500)`

Computer matching ORB features between a pair of images.

Parameters:

Name	Type	Description	Default
`src_image`	`ndarray`	The source image to compute and match features.	required
`tgt_image`	`ndarray`	The target image to compute and match features.	required
`max_features`	`int`	The maximum number of features to retain.	`500`

Returns:

Name	Type	Description
`src_points`	`ndarray`	The set of matched point coordinates for the source image.
`tgt_points`	`ndarray`	The set of matched point coordinates for the target image.

Source code in src/cvt/geometry.py

def match_features(src_image: np.ndarray, tgt_image: np.ndarray, max_features: int = 500) -> Tuple[np.ndarray, np.ndarray]:
    """Computer matching ORB features between a pair of images.

    Args:
        src_image: The source image to compute and match features.
        tgt_image: The target image to compute and match features.
        max_features: The maximum number of features to retain.

    Returns:
        src_points: The set of matched point coordinates for the source image.
        tgt_points: The set of matched point coordinates for the target image.
    """
    src_image = cv2.cvtColor(src_image,cv2.COLOR_BGR2GRAY)
    tgt_image = cv2.cvtColor(tgt_image, cv2.COLOR_BGR2GRAY)

    orb = cv2.ORB_create(max_features)

    src_keypoints, src_descriptors = orb.detectAndCompute(src_image,None)
    tgt_keypoints, tgt_descriptors = orb.detectAndCompute(tgt_image,None)

    matcher = cv2.BFMatcher(crossCheck=True)
    matches = list(matcher.match(src_descriptors, tgt_descriptors) )
    matches.sort(key = lambda x:x.distance)

    src_points = []
    tgt_points = []
    for i in range(8):
        m = matches[i]

        src_points.append(src_keypoints[m.queryIdx].pt)
        tgt_points.append(tgt_keypoints[m.trainIdx].pt)
    src_points  = np.asarray(src_points)
    tgt_points = np.asarray(tgt_points)

    return (src_points, tgt_points)

`plane_coords(K, P, depth_hypos, H, W)`

Batched PyTorch version

Source code in src/cvt/geometry.py

def plane_coords(K, P, depth_hypos, H, W):
    """Batched PyTorch version
    """
    batch_size,_,_ = K.shape
    num_planes = depth_hypos.shape[0]

    xyz = torch.movedim(torch.tensor([[0,0,1], [W-1,0,1], [0,H-1,1], [W-1,H-1,1]], dtype=torch.float32), 0, 1).to(P)
    xyz = xyz.reshape(1,3,4).repeat(batch_size, 1, 1)
    if K.shape[1]==3:
        K_44 = torch.zeros((batch_size, 4, 4)).to(P)
        K_44[:,:3,:3] = K[:,:3,:3]
        K_44[:,3,3] = 1
        K = K_44
    proj = K @ P
    inv_proj = torch.linalg.inv(proj)

    planes = torch.zeros(num_planes, 3, 4).to(inv_proj)
    for p in range(num_planes):
        planes[p] = (inv_proj[0,:3,:3] @ xyz) * depth_hypos[p]
        planes[p] += inv_proj[0,:3,3:4]

    return planes

`project_depth_map(depth, cam, mask=None)`

Projects a depth map into a list of 3D points

Parameters:

Name	Type	Description	Default
`depth`	`Tensor`	Input depth map to project.	required
`cam`	`Tensor`	Camera parameters for input depth map.	required

Returns:

Type	Description
`Tensor`	A float Tensor of 3D points corresponding to the projected depth values.

Source code in src/cvt/geometry.py

def project_depth_map(depth: torch.Tensor, cam: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
    """Projects a depth map into a list of 3D points

    Parameters:
        depth: Input depth map to project.
        cam: Camera parameters for input depth map.

    Returns:
        A float Tensor of 3D points corresponding to the projected depth values.
    """
    if (depth.shape[1] == 1):
        depth = depth.squeeze(1)

    batch_size, height, width = depth.shape
    cam_shape = cam.shape

    # get camera extrinsics and intrinsics
    P = cam[:,0,:,:]
    K = cam[:,1,:,:]
    K[:,3,:] = torch.tensor([0,0,0,1])

    # construct back-projection from invers matrices
    # separate into rotation and translation components
    bwd_projection = torch.matmul(torch.inverse(P), torch.inverse(K)).to(torch.float32)
    bwd_rotation = bwd_projection[:,:3,:3]
    bwd_translation = bwd_projection[:,:3,3:4]

    # build 2D homogeneous coordinates tensor: [B, 3, H*W]
    with torch.no_grad():
        row_span = torch.arange(0, height, dtype=torch.float32).cuda()
        col_span = torch.arange(0, width, dtype=torch.float32).cuda()
        r,c = torch.meshgrid(row_span, col_span, indexing="ij")
        r,c = r.contiguous(), c.contiguous()
        r,c = r.reshape(height*width), c.reshape(height*width)
        coords = torch.stack((c,r,torch.ones_like(c)))
        coords = torch.unsqueeze(coords, dim=0).repeat(batch_size, 1, 1)

    # compute 3D coordinates using the depth map: [B, H*W, 3]
    world_coords = torch.matmul(bwd_rotation, coords)
    depth = depth.reshape(batch_size, 1, -1)
    world_coords = world_coords * depth
    world_coords = world_coords + bwd_translation

    #TODO: make sure index select is differentiable
    #       (there is a backward function but need to find the code..)
    if (mask != None):
        world_coords = torch.index_select(world_coords, dim=2, index=non_zero_inds)
        world_coords = torch.movedim(world_coords, 1, 2)

    # reshape 3D coordinates back into 2D map: [B, H, W, 3]
    #   coords_map = world_coords.reshape(batch_size, height, width, 3)

    return world_coords

`project_renderer(renderer, K, P, width, height)`

Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

Parameters:

Name	Type	Description	Default
`renderer`	`OffscreenRenderer`	Geometric scene to be projected.	required
`K`	`ndarray`	Camera intrinsic parameters.	required
`P`	`ndarray`	Camera extrinsic parameters.	required
`width`	`float`	Desired image width.	required
`height`	`float`	Desired image height.	required

Returns:

Type	Description
`ndarray`	The rendered image for the scene at the specified camera viewpoint.

Source code in src/cvt/geometry.py

def project_renderer(renderer: o3d.visualization.rendering.OffscreenRenderer, K: np.ndarray, P: np.ndarray, width: float, height: float) -> np.ndarray:
    """Projects the scene in an Open3D Offscreen Renderer to the 2D image plane.

    Parameters:
        renderer: Geometric scene to be projected.
        K: Camera intrinsic parameters.
        P: Camera extrinsic parameters.
        width: Desired image width.
        height: Desired image height.

    Returns:
        The rendered image for the scene at the specified camera viewpoint.
    """
    # set up the renderer
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, K[0,0], K[1,1], K[0,2], K[1,2])
    renderer.setup_camera(intrins, P)

    # render image
    image = np.asarray(renderer.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    return image

`psv(cfg, images, intrinsics, extrinsics, depth_hypos)`

Performs homography warping to create a Plane Sweeping Volume (PSV). Parameters: cfg: Configuration dictionary containing configuration parameters. images: image maps to be warped into a PSV. intrinsics: intrinsics matrices. extrinsics: extrinsics matrices. depth_hypos: Depth hypotheses to use for homography warping.

Returns:

Type	Description
	The Plane Sweeping Volume computed via feature matching cost.

Source code in src/cvt/geometry.py

def psv(cfg, images, intrinsics, extrinsics, depth_hypos):
    """Performs homography warping to create a Plane Sweeping Volume (PSV).
    Parameters:
        cfg: Configuration dictionary containing configuration parameters.
        images: image maps to be warped into a PSV.
        intrinsics: intrinsics matrices.
        extrinsics: extrinsics matrices.
        depth_hypos: Depth hypotheses to use for homography warping.

    Returns:
        The Plane Sweeping Volume computed via feature matching cost.
    """
    depth_hypos = depth_hypos.squeeze(1)
    _,planes,_,_ = depth_hypos.shape
    B,views,C,H,W = images.shape

    pairwise_psv = []
    for v in range(1,views):
        with torch.no_grad():
            src_proj = torch.matmul(intrinsics[:,v], extrinsics[:,v,0:3])
            ref_proj = torch.matmul(intrinsics[:,0], extrinsics[:,0,0:3])
            last = torch.tensor([[[0,0,0,1.0]]]).repeat(B,1,1).to(images.device)
            src_proj = torch.cat((src_proj,last),1)
            ref_proj = torch.cat((ref_proj,last),1)

            proj = torch.matmul(src_proj, torch.inverse(ref_proj))
            rot = proj[:, :3, :3]  # [B,3,3]
            trans = proj[:, :3, 3:4]  # [B,3,1]

            y, x = torch.meshgrid([torch.arange(0, H, dtype=torch.float32, device=images.device),
                                torch.arange(0, W, dtype=torch.float32, device=images.device)],
                                indexing='ij')
            y, x = y.contiguous(), x.contiguous()
            y, x = y.view(H * W), x.view(H * W)
            xyz = torch.stack((x, y, torch.ones_like(x)))  # [3, H*W]
            xyz = torch.unsqueeze(xyz, 0).repeat(B, 1, 1)  # [B, 3, H*W]
            rot_xyz = torch.matmul(rot, xyz)  # [B, 3, H*W]

            rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, planes, 1) * depth_hypos.view(B, 1, planes, H*W)  # [B, 3, Ndepth, H*W]
            proj_xyz = rot_depth_xyz + trans.view(B, 3, 1, 1)  # [B, 3, Ndepth, H*W]
            proj_xy = proj_xyz[:, :2, :, :] / proj_xyz[:, 2:3, :, :]  # [B, 2, Ndepth, H*W]
            proj_x_normalized = proj_xy[:, 0, :, :] / ((W - 1) / 2) - 1
            proj_y_normalized = proj_xy[:, 1, :, :] / ((H - 1) / 2) - 1
            proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)  # [B, Ndepth, H*W, 2]
            grid = proj_xy
        grid = grid.type(images.dtype)

        warped_src_image = F.grid_sample( images[:,v],
                                        grid.view(B, planes * H, W, 2), 
                                        mode='bilinear',
                                        padding_mode='zeros',
                                        align_corners=False)
        pairwise_psv.append(warped_src_image.view(B, C, planes, H, W))

    return pairwise_psv

`render_custom_values(points, values, image_shape, cam)`

Renders a point cloud into a 2D camera plane using custom values for each pixel.

Parameters:

Name	Type	Description	Default
`points`	`ndarray`	List of 3D points to be rendered.	required
`values`	`ndarray`	List of values to be written in the rendered image.	required
`image_shape`	`Tuple[int, int]`	Desired shape (height,width) of the rendered image.	required
`cam`	`ndarray`	Camera parameters for the image viewpoint.	required

Returns:

Type	Description
`ndarray`	The rendered image for the list of points using the sepcified corresponding values.

Source code in src/cvt/geometry.py

def render_custom_values(points: np.ndarray, values: np.ndarray, image_shape: Tuple[int,int], cam: np.ndarray) -> np.ndarray:
    """Renders a point cloud into a 2D camera plane using custom values for each pixel.

    Parameters:
        points: List of 3D points to be rendered.
        values: List of values to be written in the rendered image.
        image_shape: Desired shape (height,width) of the rendered image.
        cam: Camera parameters for the image viewpoint.

    Returns:
        The rendered image for the list of points using the sepcified corresponding values.
    """
    points = points.tolist()
    values = list(values.astype(float))
    cam = cam.flatten().tolist()

    rendered_img = rd.render(list(image_shape), points, values, cam)

    return rendered_img

`render_point_cloud(render, intrins, pose)`

Renders a point cloud into a 2D image plane.

Parameters:

Returns:

Source code in src/cvt/geometry.py

def render_point_cloud(render, intrins, pose):
    """Renders a point cloud into a 2D image plane.

    Parameters:

    Returns:
    """
    render.setup_camera(intrins, pose)

    # render image
    image = np.asarray(render.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    depth = np.asarray(render.render_to_depth_image(z_in_view_space=True))

    return image, depth

`render_point_cloud_single(cloud, pose, K, width, height)`

Renders a point cloud into a 2D image plane.

Parameters:

Name	Type	Description	Default
`cloud`	`PointCloud`	Point cloud to be rendered.	required
`pose`	`ndarray`	Camera extrinsic parameters for the image plane.	required
`K`	`ndarray`	Camera intrinsic parameters for the image plane.	required
`width`	`int`	Desired width of the rendered image.	required
`height`	`int`	Desired height of the rendered image.	required

Returns:

Type	Description
`ndarray`	The rendered image for the point cloud at the specified camera viewpoint.

Source code in src/cvt/geometry.py

def render_point_cloud_single(cloud: o3d.geometry.PointCloud, pose: np.ndarray, K: np.ndarray, width: int, height: int) -> np.ndarray:
    """Renders a point cloud into a 2D image plane.

    Parameters:
        cloud: Point cloud to be rendered.
        pose: Camera extrinsic parameters for the image plane.
        K: Camera intrinsic parameters for the image plane.
        width: Desired width of the rendered image.
        height: Desired height of the rendered image.

    Returns:
        The rendered image for the point cloud at the specified camera viewpoint.
    """
    #   cmap = plt.get_cmap("hot_r")
    #   colors = cmap(dists)[:, :3]
    #   ply.colors = o3d.utility.Vector3dVector(colors)

    # set up the renderer
    render = o3d.visualization.rendering.OffscreenRenderer(width, height)
    mat = o3d.visualization.rendering.MaterialRecord()
    mat.shader = 'defaultUnlit'
    render.scene.add_geometry("cloud", cloud, mat)
    render.scene.set_background(np.asarray([0,0,0,1])) #r,g,b,a
    intrins = o3d.camera.PinholeCameraIntrinsic(width, height, K[0,0], K[1,1], K[0,2], K[1,2])
    render.setup_camera(intrins, pose)

    # render image
    image = np.asarray(render.render_to_image())
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    depth = np.asarray(render.render_to_depth_image(z_in_view_space=True))

    return image, depth

`reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P)`

Computes the re-projection depth values and pixel indices between two depth maps.

This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source depth map is first projected into the target camera plane using the source depth values and the camera parameters for both views. Using the projected pixel coordinates in the target view, the target depths are then re-projected back into the source camera plane (again with the camera parameters for both views). The information prouced from this process is often used to compute errors in re-projection between two depth maps, or similar operations.

Parameters:

Name	Description	Default
`src_depth`	Source depth map to be projected.	required
`src_K`	Intrinsic camera parameters for the source depth map viewpoint.	required
`src_P`	Extrinsic camera parameters for the source depth map viewpoint.	required
`tgt_depth`	Target depth map used for re-projection.	required
`tgt_K`	Intrinsic camera parameters for the target depth map viewpoint.	required
`tgt_P`	Extrinsic camera parameters for the target depth map viewpoint.	required

Returns:

Name	Type	Description
`depth_reprojected`		The re-projected depth values for the source depth map.
`coords_reprojected`		The re-projection coordinates for the source view.
`coords_tgt`		The projected coordinates for the target view.

Source code in src/cvt/geometry.py

def reproject(src_depth, src_K, src_P, tgt_depth, tgt_K, tgt_P):
    """Computes the re-projection depth values and pixel indices between two depth maps.

    This function takes as input two depth maps: 'src_depth' and 'tgt_depth'. The source
    depth map is first projected into the target camera plane using the source depth
    values and the camera parameters for both views. Using the projected pixel
    coordinates in the target view, the target depths are then re-projected back into
    the source camera plane (again with the camera parameters for both views). The
    information prouced from this process is often used to compute errors in
    re-projection between two depth maps, or similar operations.

    Parameters:
        src_depth: Source depth map to be projected.
        src_K: Intrinsic camera parameters for the source depth map viewpoint.
        src_P: Extrinsic camera parameters for the source depth map viewpoint.
        tgt_depth: Target depth map used for re-projection.
        tgt_K: Intrinsic camera parameters for the target depth map viewpoint.
        tgt_P: Extrinsic camera parameters for the target depth map viewpoint.

    Returns:
        depth_reprojected: The re-projected depth values for the source depth map.
        coords_reprojected: The re-projection coordinates for the source view.
        coords_tgt: The projected coordinates for the target view.
    """
    batch_size, c, height, width = src_depth.shape

    # back-project ref depths to 3D
    x_src, y_src = torch.meshgrid(torch.arange(0, width), torch.arange(0, height), indexing="xy")
    x_src = x_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    y_src = y_src.reshape(-1).unsqueeze(0).repeat(batch_size, 1).to(src_depth)
    homog = torch.stack((x_src, y_src, torch.ones_like(x_src)), dim=1)
    xyz_src = torch.matmul(torch.linalg.inv(src_K), homog * src_depth.reshape(batch_size, 1, -1))

    # transform 3D points from ref to src coords
    homog_3d = torch.concatenate((xyz_src, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.matmul(tgt_P, torch.linalg.inv(src_P)), homog_3d)[:,:3]

    # project src 3D points into pixel coords
    K_xyz_tgt = torch.matmul(tgt_K, xyz_tgt)
    xy_tgt = K_xyz_tgt[:,:2] / K_xyz_tgt[:,2:3]
    x_tgt = xy_tgt[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_tgt = xy_tgt[:,1].reshape(batch_size, height, width).to(torch.float32)
    coords_tgt = torch.stack((x_tgt, y_tgt), dim=-1) # B x H x W x 2

    # sample the depth values from the src map at each pixel coord
    x_normalized = ((x_tgt / (width-1)) * 2) - 1
    y_normalized = ((y_tgt / (height-1)) * 2) - 1
    grid = torch.stack((x_normalized, y_normalized), dim=-1) # B x H x W x 2
    sampled_depth_tgt = F.grid_sample(
                                    tgt_depth,
                                    grid,
                                    mode="bilinear",
                                    padding_mode="zeros",
                                    align_corners=False)

    # back-project src depths to 3D
    homog = torch.concatenate((xy_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_tgt = torch.matmul(torch.linalg.inv(tgt_K), homog * sampled_depth_tgt.reshape(batch_size, 1, -1))

    # transform 3D points from src to ref coords
    homog_3d = torch.concatenate((xyz_tgt, torch.ones_like(x_src).unsqueeze(1)), dim=1)
    xyz_reprojected = torch.matmul(torch.matmul(src_P, torch.linalg.inv(tgt_P)), homog_3d)[:,:3]

    # extract reprojected depth values
    depth_reprojected = xyz_reprojected[:,2].reshape(batch_size, height, width).to(torch.float32)

    # project ref 3D points into pixel coords
    K_xyz_reprojected = torch.matmul(src_K, xyz_reprojected)
    xy_reprojected = K_xyz_reprojected[:,:2] / (K_xyz_reprojected[:,2:3] + 1e-7)
    x_reprojected = xy_reprojected[:,0].reshape(batch_size, height, width).to(torch.float32)
    y_reprojected = xy_reprojected[:,1].reshape(batch_size, height, width).to(torch.float32)

    coords_reprojected = torch.stack((x_reprojected, y_reprojected), dim=-1) # B x H x W x 2

    return depth_reprojected, coords_reprojected, coords_tgt

`resolution_based_hypothesis(data, target_hypo, level, focal_length, min_hypo, max_hypo, delta_in=1)`

Parameters:

Returns:

Source code in src/cvt/geometry.py

def resolution_based_hypothesis(data, target_hypo, level, focal_length, min_hypo, max_hypo, delta_in=1):
    """
    Parameters:

    Returns:
    """
    B, _, D, H, W = target_hypo.shape
    rand_match_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    rand_near_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    rand_far_offset = torch.rand(B,1,M,H,W).to(target_hypo)

    near, far = Z_from_disp(target_hypo, data["baseline"], focal_length, delta=delta_in)
    target_range = torch.abs(far - near).repeat(1,1,M,1,1)
    near_range = torch.abs(near - min_hypo).repeat(1,1,M,1,1)
    far_range = torch.abs(max_hypo - far).repeat(1,1,M,1,1)

    target_samples = (rand_match_offset * target_range) + near
    near_samples = (rand_near_offset * near_range) + min_hypo
    far_samples = (rand_far_offset * far_range) + far
    samples = torch.cat([target_samples,near_samples,far_samples], dim=1)
    samples = samples.reshape(B,-1,H,W).unsqueeze(1) # [B, 1, M*3, H, W]

    mask = torch.where(target_hypo <= 0, 0.0, 1.0).repeat(1,1,M*3,1,1)
    hypos = torch.clip(samples, min_hypo, max_hypo) * mask

    return hypos

`sample_volume(volume, z_vals, coords, H, W, near_depth, far_depth, inv_depth)`

Parameters:

Returns:

Source code in src/cvt/geometry.py

def sample_volume(volume, z_vals, coords, H, W, near_depth, far_depth, inv_depth):
    """
    Parameters:

    Returns:
    """
    N, M = z_vals.shape
    batch_size, c, _, _, _ = volume.shape

    z_vals = z_vals.reshape(N,M,1) # N x M x 1
    if inv_depth:
        z_vals = 1/z_vals
        near_depth = 1/near_depth
        far_depth = 1/far_depth
    coords = coords.reshape(N,1,2).repeat(1,M,1) # N x M x 2
    x_coords = coords[:,:,1:2]
    y_coords = coords[:,:,0:1]
    points = torch.cat([x_coords, y_coords, z_vals], dim=-1) # N x M x 3
    points = torch.reshape(points, [-1, 3]) # N*M x 3

    # define coordinates bounds
    min_coord = torch.tensor([0,0,near_depth]).to(points)
    max_coord = torch.tensor([W-1,H-1,far_depth]).to(points)
    min_coord = min_coord.reshape(1,3).repeat(N*M,1)
    max_coord = max_coord.reshape(1,3).repeat(N*M,1) 

    # normalize points
    norm_points = (points - min_coord) / (max_coord - min_coord)
    norm_points = norm_points.unsqueeze(0).repeat(batch_size, 1, 1)
    norm_points = (norm_points * 2) - 1

    # Note: The shape of the input volume is 5D: Batch x Channels x Depth x Height x Width.
    #       The input coordinates must be in [x, y, z] format where x->width, y->height, z->depth.
    #       These coordinates must be normalized between [-1, 1].
    features = F.grid_sample(volume,
                            norm_points.view(batch_size, N*M, 1, 1, 3),
                            mode='bilinear',
                            padding_mode='zeros',
                            align_corners=True)
    features = torch.movedim(features.reshape(c, N*M), 0, 1) # N*M x c

    return features

`soft_hypothesis(data, target_hypo, focal_length, min_hypo, max_hypo, M, delta_in=1)`

Parameters:

Returns:

Source code in src/cvt/geometry.py

def soft_hypothesis(data, target_hypo, focal_length, min_hypo, max_hypo, M, delta_in=1):
    """
    Parameters:

    Returns:
    """
    B, _, D, H, W = target_hypo.shape
    rand_match_offset = torch.rand(B,1,M,H,W).to(target_hypo)
    near, far = Z_from_disp(target_hypo, data["baseline"], focal_length, delta=delta_in)
    target_range = torch.abs(far - near).repeat(1,1,M,1,1)

    target_samples = (rand_match_offset * target_range) + near
    mask = torch.where(target_hypo <= 0, 0.0, 1.0).repeat(1,1,M,1,1)
    matching_hypos = torch.clip(target_samples, min_hypo, max_hypo) * mask

    return matching_hypos

`uniform_hypothesis(cfg, device, batch_size, depth_min, depth_max, img_height, img_width, planes, inv_depth=False, bin_format=False)`

Parameters:

Returns:

Source code in src/cvt/geometry.py

def uniform_hypothesis(cfg, device, batch_size, depth_min, depth_max, img_height, img_width, planes, inv_depth=False, bin_format=False):
    """
    Parameters:

    Returns:
    """
    depth_range = depth_max-depth_min

    hypotheses = torch.zeros((batch_size,planes),device=device)
    for b in range(0,batch_size):
        if bin_format:
            spacing = depth_range/planes
            start_depth = depth_min + (spacing/2)
            end_depth = depth_min + (spacing/2) + ((planes-1)*spacing)
        else:
            start_depth = depth_min
            end_depth = depth_max
        if inv_depth:
            hypotheses[b] = 1/(torch.linspace(1/start_depth,1/end_depth,steps=planes,device=device))
        else:
            hypotheses[b] = torch.linspace(start_depth, end_depth, steps=planes,device=device)
    hypotheses = hypotheses.unsqueeze(2).unsqueeze(3).repeat(1,1,img_height,img_width)

    # Make coordinate for depth hypothesis, to be used by sparse convolution.
    depth_hypo_coords = torch.zeros((batch_size,planes),device=device)
    for b in range(0,batch_size):
        depth_hypo_coords[b] = torch.linspace(0,planes-1,steps=planes,device=device)
    depth_hypo_coords = depth_hypo_coords.unsqueeze(2).unsqueeze(3).repeat(1,1,img_height,img_width)

    # Calculate hypothesis interval
    hypo_intervals = hypotheses[:,1:]-hypotheses[:,:-1]
    hypo_intervals = torch.cat((hypo_intervals,hypo_intervals[:,-1].unsqueeze(1)),dim=1)

    return hypotheses.unsqueeze(1), depth_hypo_coords.unsqueeze(1), hypo_intervals.unsqueeze(1)

`visibility(depths, K, Ps, vis_th, levels=4)`

Parameters:

Returns:

Source code in src/cvt/geometry.py

def visibility(depths, K, Ps, vis_th, levels=4):
    """
    Parameters:

    Returns:
    """
    batch_size, views, c, H, W = depths.shape

    K_pyr = intrinsic_pyramid(K, levels)

    vis_maps = []
    vis_masks = []
    for l in range(levels):
        resized_depths = tvf.resize(depths[:,:,0], [int(H/(2**l)), int(W/(2**l))]).unsqueeze(2)
        batch_size, views, c, height, width = resized_depths.shape
        vis_map = torch.where(resized_depths[:,0] > 0.0, 1, 0)

        for i in range(1, views):
            mask = geometric_consistency_mask(resized_depths[:,0], K_pyr[:,l], Ps[:,0], resized_depths[:,i], K_pyr[:,l], Ps[:,i], pixel_th=0.5)
            vis_map += mask.unsqueeze(1)
        vis_map = vis_map.to(torch.float32)

        vis_maps.append(vis_map)
        vis_masks.append(torch.where(vis_map >= vis_th, 1, 0))
    return vis_maps, vis_masks

`visibility_mask(src_depth, src_cam, depth_files, cam_files, src_ind=-1, pixel_th=0.1)`

Computes a visibility mask between a provided source depth map and list of target depth maps.

Parameters:

Name	Type	Description	Default
`src_depth`	`ndarray`	Depth map for the source view.	required
`src_cam`	`ndarray`	Camera parameters for the source depth map viewpoint.	required
`depth_files`	`List[str]`	List of target depth maps.	required
`cam_files`	`List[str]`	List of corresponding target camera parameters for each targte depth map viewpoint.	required
`src_ind`	`int`	Index into 'depth_files' corresponding to the source depth map (if included in the list).	`-1`
`pixel_th`	`float`	Pixel re-projection threshold to determine matching depth estimates.	`0.1`

Returns:

Type	Description
`ndarray`	The visibility mask for the source view.

Source code in src/cvt/geometry.py

def visibility_mask(src_depth: np.ndarray, src_cam: np.ndarray, depth_files: List[str], cam_files: List[str], src_ind: int = -1, pixel_th: float = 0.1) -> np.ndarray:
    """Computes a visibility mask between a provided source depth map and list of target depth maps.

    Parameters:
        src_depth: Depth map for the source view.
        src_cam: Camera parameters for the source depth map viewpoint.
        depth_files: List of target depth maps.
        cam_files: List of corresponding target camera parameters for each targte depth map viewpoint.
        src_ind: Index into 'depth_files' corresponding to the source depth map (if included in the list).
        pixel_th: Pixel re-projection threshold to determine matching depth estimates.

    Returns:
        The visibility mask for the source view.
    """
    height, width = src_depth.shape
    vis_map = np.not_equal(src_depth, 0.0).astype(np.double)

    for i in range(len(depth_files)):
        if (i==src_ind):
            continue

        # get files
        sdf = depth_files[i]
        scf = cam_files[i]

        # load data
        tgt_depth = read_pfm(sdf)
        tgt_cam = read_single_cam_sfm(scf,'r')

        mask = geometric_consistency_mask(src_depth, src_cam, tgt_depth, tgt_cam, pixel_th)
        vis_map += mask

    return vis_map.astype(np.float32)