Skip to content

cvt.common

A suite of common utility functions.

This module includes general utility functions used by the sub-packages of the CVTkit library.

build_coords_list(H, W, batch_size, device)

Constructs an batched index list of pixel coordinates.

Parameters:

Name Type Description Default
H int

Height of the pixel grid.

required
W int

Width of the pixel grid.

required
batch_size int

Number of batches.

required
device str

GPU device identifier.

required

Returns:

Type Description
Tensor

The index list of shape [batch_size, H*W, 2]

Source code in src/cvt/common.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def build_coords_list(H: int, W: int, batch_size: int, device: str) -> torch.Tensor:
    """Constructs an batched index list of pixel coordinates.

    Parameters:
        H: Height of the pixel grid.
        W: Width of the pixel grid.
        batch_size: Number of batches.
        device: GPU device identifier.

    Returns:
        The index list of shape [batch_size, H*W, 2]
    """
    indices_h = torch.linspace(0, H, H, dtype=torch.int64)
    indices_w = torch.linspace(0, W, W, dtype=torch.int64)
    indices_h, indices_w = torch.meshgrid(indices_h, indices_w)
    indices = torch.stack([indices_h, indices_w], dim=-1).to(torch.int64).to(device)
    indices = indices.reshape(1,-1,2).repeat(batch_size,1,1)
    return indices

cosine_similarity(t1, t2)

Computes the cosine similarity between two tensors.

Parameters:

Name Type Description Default
t1 Tensor

First tensor.

required
t2 Tensor

Second tensor.

required

Returns:

Type Description
Tensor

The cosine similarity between the two tensors.

Source code in src/cvt/common.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def cosine_similarity(t1: torch.Tensor, t2: torch.Tensor) -> torch.Tensor:
    """Computes the cosine similarity between two tensors.

    Parameters:
        t1: First tensor.
        t2: Second tensor.

    Returns:
       The cosine similarity between the two tensors.
    """
    assert(t1.shape==t2.shape)
    similarity = torch.abs(F.cosine_similarity(t1,t2,dim=1).unsqueeze(1))

    if (len(t1.shape) == 5):
        B, C, D, H, W = t1.shape
        assert(similarity.shape == (B, 1, D, H, W))
    elif (len(t1.shape) == 4):
        B, C, H, W = t1.shape
        assert(similarity.shape == (B, 1, H, W))
    else:
        print("Can only compute cosine similarity with 4 or 5 dimension tensors")
        sys.exit()

    return similarity

groupwise_correlation(t1, t2, num_groups)

Computes the Group-Wise Correlation (GWC) between two tensors.

Parameters:

Name Type Description Default
t1 Tensor

First tensor.

required
t2 Tensor

Second tensor.

required
num_groups int

Number of groups.

required

Returns:

Type Description
Tensor

The Group-Wise Correlation (GWC) between the two tensors.

Source code in src/cvt/common.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def groupwise_correlation(t1: torch.Tensor, t2: torch.Tensor, num_groups: int) -> torch.Tensor:
    """Computes the Group-Wise Correlation (GWC) between two tensors.

    Parameters:
        t1: First tensor.
        t2: Second tensor.
        num_groups: Number of groups.

    Returns:
       The Group-Wise Correlation (GWC) between the two tensors.
    """
    assert(t1.shape==t2.shape)
    if (len(t1.shape) == 5):
        B, C, D, H, W = t1.shape
        assert C % num_groups == 0
        channels_per_group = C // num_groups
        correlation = (t1 * t2).view([B, num_groups, channels_per_group, D, H, W]).mean(dim=2)
        assert correlation.shape == (B, num_groups, D, H, W)
    elif (len(t1.shape) == 4):
        B, C, H, W = t1.shape
        assert C % num_groups == 0
        channels_per_group = C // num_groups
        correlation = (t1 * t2).view([B, num_groups, channels_per_group, H, W]).mean(dim=2)
        assert correlation.shape == (B, num_groups, H, W)
    else:
        print("Can only compute GWC with 4 or 5 dimension tensors")
        sys.exit()

    return correlation

laplacian_pyramid(image)

Computes the Laplacian pyramid of an image.

Parameters:

Name Type Description Default
image Tensor

2D map to compute Laplacian over.

required
tau

Laplacian region threshold.

required

Returns:

Type Description
Tensor

The map of the Laplacian regions.

Source code in src/cvt/common.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def laplacian_pyramid(image: torch.Tensor) -> torch.Tensor:
    """Computes the Laplacian pyramid of an image.

    Parameters:
        image: 2D map to compute Laplacian over.
        tau: Laplacian region threshold.

    Returns:
        The map of the Laplacian regions.
    """
    batch_size, c, h, w = image.shape
    levels = 4

    # build gaussian pyramid
    pyr = [image]
    for l in range(levels):
        pyr.append(F.interpolate(pyr[-1], scale_factor=0.5, mode="bilinear"))

    # compute laplacian pyramid (differance between gaussian pyramid levels)
    for l in range(levels, 0, -1):
        region_id = (levels-l+1)

        diff = (torch.abs(F.interpolate(pyr[l], scale_factor=2, mode="bilinear") - pyr[l-1])).mean(dim=1, keepdim=True)
        diff = F.interpolate(diff, size=(h, w), mode="bilinear")
        diff = diff.reshape(batch_size,h,w,1)

        if l==levels:
            all_diff = diff
        else:
            all_diff += diff

    return all_diff.reshape(batch_size, 1, h, w)

laplacian_pyramid_th(image, tau)

Computes the Laplacian pyramid of an image.

Parameters:

Name Type Description Default
image Tensor

2D map to compute Laplacian over.

required
tau float

Laplacian region threshold.

required

Returns:

Type Description
Tensor

The map of the Laplacian regions.

Source code in src/cvt/common.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def laplacian_pyramid_th(image: torch.Tensor, tau: float) -> torch.Tensor:
    """Computes the Laplacian pyramid of an image.

    Parameters:
        image: 2D map to compute Laplacian over.
        tau: Laplacian region threshold.

    Returns:
        The map of the Laplacian regions.
    """
    batch_size, c, h, w = image.shape
    levels = 4

    # build gaussian pyramid
    pyr = [image]
    for l in range(levels):
        pyr.append(F.interpolate(pyr[-1], scale_factor=0.5, mode="bilinear"))

    # compute laplacian pyramid (differance between gaussian pyramid levels)
    for l in range(levels, 0, -1):
        region_id = (levels-l+1)

        diff = (torch.abs(F.interpolate(pyr[l], scale_factor=2, mode="bilinear") - pyr[l-1])).mean(dim=1, keepdim=True)
        diff = F.interpolate(diff, size=(h, w), mode="bilinear")
        diff_mask = torch.where(diff > tau, 1, 0)

        diff_mask = diff_mask.reshape(batch_size,h,w,1)

        if l==levels:
            all_diff_mask = diff_mask*region_id
        else:
            all_diff_mask = torch.where(diff_mask==1, region_id, all_diff_mask)

    return all_diff_mask.reshape(batch_size, 1, h, w)

non_zero_std(maps, device, dim=1, keepdim=False)

Computes the standard deviation of all non-zero values in an input Tensor along the given dimension.

Parameters:

Name Type Description Default
maps Tensor
required
device str
required
keepdim bool
False

Returns:

Type Description
Tensor

The standard deviation of the non-zero elements of the input map.

Source code in src/cvt/common.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def non_zero_std(maps: torch.Tensor, device: str, dim: int = 1, keepdim: bool = False) -> torch.Tensor:
    """Computes the standard deviation of all non-zero values in an input Tensor along the given dimension.

    Parameters:
        maps:
        device:
        keepdim:

    Returns:
        The standard deviation of the non-zero elements of the input map.
    """
    batch_size, views, height, width = maps.shape
    valid_map = torch.ne(maps, 0.0).to(torch.float32).to(device)
    valid_count = torch.sum(valid_map, dim=1, keepdim=keepdim)+1e-7
    mean = torch.div(torch.sum(maps,dim=1, keepdim=keepdim), valid_count).reshape(batch_size, 1, height, width).repeat(1,views,1,1)
    mean = torch.mul(valid_map, mean)

    std = torch.sub(maps, mean)
    std = torch.square(std)
    std = torch.sum(std, dim=1, keepdim=keepdim)
    std = torch.div(std, valid_count)
    std = torch.sqrt(std)

    return std

parameters_count(net, name, do_print=True)

Parameters:

Returns:

Source code in src/cvt/common.py
240
241
242
243
244
245
246
247
248
249
250
251
def parameters_count(net, name, do_print=True):
    """

    Parameters:

    Returns:
    """
    model_parameters = filter(lambda p: p.requires_grad, net.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    if do_print:
        print(f"#params {name}: {(params/1e6):0.3f} M")
    return params

print_gpu_mem()

Prints the current unallocated memory of the GPU.

Source code in src/cvt/common.py
253
254
255
256
257
258
259
260
def print_gpu_mem() -> None:
    """Prints the current unallocated memory of the GPU.
    """
    t = torch.cuda.get_device_properties(0).total_memory
    r = torch.cuda.memory_reserved(0)
    a = torch.cuda.memory_allocated(0)
    f = t- (a+r)
    print("Free: {:0.4f} GB".format(f/(1024*1024*1024)))

round_nearest(num, decimal=0)

Rounds a floating point number to the nearest decimal place.

Parameters:

Name Type Description Default
num float

Float to be rounded.

required
decimal int

Decimal place to round to.

0

Returns:

Type Description
int

The given number rounded to the nearest decimal place.

Examples:

>>> round_nearest(11.1)
11
>>> round_nearest(15.7)
16
>>> round_nearest(2.5)
2
>>> round_nearest(3.5)
3
>>> round_nearest(14.156, 1)
14.2
>>> round_nearest(15.156, 1)
15.2
>>> round_nearest(15.156, 2)
15.16
Source code in src/cvt/common.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def round_nearest(num: float, decimal: int = 0) -> int:
    """Rounds a floating point number to the nearest decimal place.

    Args:
        num: Float to be rounded.
        decimal: Decimal place to round to.

    Returns:
        The given number rounded to the nearest decimal place.

    Examples:
        >>> round_nearest(11.1)
        11
        >>> round_nearest(15.7)
        16
        >>> round_nearest(2.5)
        2
        >>> round_nearest(3.5)
        3
        >>> round_nearest(14.156, 1)
        14.2
        >>> round_nearest(15.156, 1)
        15.2
        >>> round_nearest(15.156, 2)
        15.16
    """

    return np.round(num+10**(-len(str(num))-1), decimal)

scale_camera(cam, scale=1.0)

Scales a camera intrinsic parameters.

Parameters:

Name Type Description Default
cam ndarray

Input camera to be scaled.

required
scale float

Scale factor.

1.0

Returns:

Type Description
ndarray

The scaled camera.

Source code in src/cvt/common.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def scale_camera(cam: np.ndarray, scale: float = 1.0) -> np.ndarray:
    """Scales a camera intrinsic parameters.

    Parameters:
        cam: Input camera to be scaled.
        scale: Scale factor.

    Returns:
        The scaled camera.
    """
    new_cam = np.copy(cam)
    new_cam[1][0][0] = cam[1][0][0] * scale
    new_cam[1][1][1] = cam[1][1][1] * scale
    new_cam[1][0][2] = cam[1][0][2] * scale
    new_cam[1][1][2] = cam[1][1][2] * scale
    return new_cam

scale_image(image, scale=1.0, interpolation='linear')

Scales an input pixel grid.

Parameters:

Name Type Description Default
image ndarray

Input image to be scaled.

required
scale float

Scale factor.

1.0
interpolation str

Interpolation technique to be used.

'linear'

Returns:

Type Description
ndarray

The scaled image.

Source code in src/cvt/common.py
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
def scale_image(image: np.ndarray, scale: float = 1.0, interpolation: str = "linear") -> np.ndarray:
    """Scales an input pixel grid.

    Parameters:
        image: Input image to be scaled.
        scale: Scale factor.
        interpolation: Interpolation technique to be used.

    Returns:
        The scaled image.
    """
    if interpolation == 'linear':
        return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
    if interpolation == 'nearest':
        return cv2.resize(image, None, fx=scale, fy=scale, interpolation=cv2.INTER_NEAREST)

scale_mvs_data(depths, confs, cams, scale=1.0, interpolation='linear')

Scales input depth maps, confidence maps, and cameras.

Parameters:

Name Type Description Default
depths ndarray

Input depth maps to be scaled.

required
confs ndarray

Input confidence maps to be scaled.

required
cams ndarray

Input cameras to be scaled

required
scale float

Scale factor.

1.0
interpolation str

Interpolation technique.

'linear'

Returns:

Name Type Description
scaled_depths ndarray

The scaled depth maps.

scaled_confs ndarray

The scaled confidence maps.

cams ndarray

The scaled cameras.

Source code in src/cvt/common.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
def scale_mvs_data(depths: np.ndarray, confs: np.ndarray, cams: np.ndarray, scale: float = 1.0, interpolation: str = "linear") -> Tuple[np.ndarray,np.ndarray,np.ndarray]:
    """Scales input depth maps, confidence maps, and cameras.

    Parameters:
        depths: Input depth maps to be scaled.
        confs: Input confidence maps to be scaled.
        cams: Input cameras to be scaled
        scale: Scale factor.
        interpolation: Interpolation technique.

    Returns:
        scaled_depths: The scaled depth maps.
        scaled_confs: The scaled confidence maps.
        cams: The scaled cameras.
    """
    views, height, width = depths.shape

    scaled_depths = []
    scaled_confs = []

    for view in range(views):
        scaled_depths.append(scale_image(depths[view], scale=scale, interpolation=interpolation))
        scaled_confs.append(scale_image(confs[view], scale=scale, interpolation=interpolation))
        cams[view] = scale_camera(cams[view], scale=scale)

    return np.asarray(scaled_depths), np.asarray(scaled_confs), cams

set_random_seed(seed)

Parameters:

Returns:

Source code in src/cvt/common.py
354
355
356
357
358
359
360
361
362
363
def set_random_seed(seed):
    """

    Parameters:

    Returns:
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

to_gpu(data, device)

Loads a dictionary of elements onto the GPU device.

Parameters:

Name Type Description Default
data dict

Dictionary to be loaded.

required
device str

GPU device identifier.

required
Source code in src/cvt/common.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
def to_gpu(data: dict, device: str) -> None:
    """Loads a dictionary of elements onto the GPU device.

    Parameters:
        data: Dictionary to be loaded.
        device: GPU device identifier.
    """

    for key,val in data.items():
        if isinstance(val, torch.Tensor):
            data[key] = val.cuda(device, non_blocking=True)
        if isinstance(val, dict):
            for k1,v1 in val.items():
                if isinstance(v1, torch.Tensor):
                    data[key][k1] = v1.cuda(device, non_blocking=True)
    return