Skip to content

cvt.io

Pose File Formats

Please see the resources page on pose file formats to learn more about the expected/supported formats commonly used in this library.

A suite of common input/output functions.

This module includes several functions for reading and writing different types of data useful for computer vision applications.

load_pretrained_model(model, ckpt)

Loads model weights from disk.

Source code in src/cvt/io.py
432
433
434
435
436
437
438
439
440
441
def load_pretrained_model(model, ckpt):
    """Loads model weights from disk.
    """
    print(f"Loading model from: {ckpt}...")
    try:
        model.load_state_dict(torch.load(ckpt))
    except Exception as e:
        print(e)
        print("Failed loading network weights...")
        sys.exit()

read_cams_sfm(camera_path, extension='cam.txt')

Reads an entire directory of camera files in SFM format.

Parameters:

Name Type Description Default
camera_path str

Path to the directory of camera files.

required
extension str

File extension being used for the camera files.

'cam.txt'

Returns:

Type Description
ndarray

Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).

Source code in src/cvt/io.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def read_cams_sfm(camera_path: str, extension: str = "cam.txt") -> np.ndarray:
    """Reads an entire directory of camera files in SFM format.

    Parameters:
        camera_path: Path to the directory of camera files.
        extension: File extension being used for the camera files.

    Returns:
        Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).
    """
    cam_files = os.listdir(camera_path)
    cam_files.sort()

    cams = []

    for cf in cam_files:
        if (cf[-7:] != extension):
            continue

        cam_path = os.path.join(camera_path,cf)
        #with open(cam_path,'r') as f:
        cam = read_single_cam_sfm(cam_path, 256)
        cams.append(cam)

    return np.asarray(cams)

read_cams_trajectory(log_file)

Reads camera file in Trajectory File format.

Parameters:

Name Type Description Default
log_file str

Input *.log file to be read.

required

Returns:

Type Description
ndarray

Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).

Source code in src/cvt/io.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def read_cams_trajectory(log_file: str) -> np.ndarray:
    """Reads camera file in Trajectory File format.

    Parameters:
        log_file: Input *.log file to be read.

    Returns:
        Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).
    """
    cams = []

    with open(log_file,'r') as f:
        lines = f.readlines()

        for i in range(0,len(lines),5):
            cam = np.zeros((4, 4))
            # read extrinsic
            for j in range(1, 5):
                cam[j-1,:] = np.array([float(l.strip()) for l in lines[i+j].split()])
            cam = np.linalg.inv(cam)

            cams.append(cam)

    return cams

read_cluster_list(filename)

Reads a cluster list file encoding supporting camera viewpoints.

Parameters:

Name Type Description Default
filename str

Input file encoding per-camera viewpoints.

required

Returns:

Type Description
List[Tuple[int, List[int]]]

An array of tuples encoding (ref_view, [src_1,src_2,..])

Source code in src/cvt/io.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def read_cluster_list(filename: str) -> List[Tuple[int,List[int]]]:
    """Reads a cluster list file encoding supporting camera viewpoints.

    Parameters:
        filename: Input file encoding per-camera viewpoints.

    Returns:
        An array of tuples encoding (ref_view, [src_1,src_2,..])
    """
    data = []
    with open(filename) as f:
        num_views = int(f.readline())
        all_views = list(range(0,num_views))

        for view_idx in range(num_views):
            ref_view = int(f.readline().rstrip())
            src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
            if len(src_views) == 0:
                continue
            data.append((ref_view, src_views))
    return data

read_extrinsics_tum(tum_file, key_frames=None)

Reads extrinsic camera trajectories in TUM format [timestamp tx ty tz qx qy qz qw].

Parameters:

Name Type Description Default
tum_file str

Input extrinsics file.

required
key_frames List[int]

Indices corresponding to the desired keyframes.

None

Returns:

Type Description
ndarray

Array of camera extrinsics (Nx4x4).

Source code in src/cvt/io.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def read_extrinsics_tum(tum_file: str, key_frames: List[int] = None) -> np.ndarray:
    """Reads extrinsic camera trajectories in TUM format [timestamp tx ty tz qx qy qz qw].

    Parameters:
        tum_file: Input extrinsics file.
        key_frames: Indices corresponding to the desired keyframes.

    Returns:
        Array of camera extrinsics (Nx4x4).
    """
    rot_interval = 30
    max_rot_angle = math.pi / 3

    extrinsics = []
    with open(tum_file,"r") as tf:
        lines = tf.readlines()

        for i,line in enumerate(lines):
            l = np.asarray(line.strip().split(" "), dtype=float)
            l = l[1:]
            t = l[:3]
            q = l[3:]

            R = rot.from_quat(q).as_matrix()
            R = R.transpose()
            t = -R@t
            P = np.zeros((4,4))
            P[:3,:3] = R
            P[:3,3] = t.transpose()
            P[3,3] = 1

            extrinsics.append(P)

            if((key_frames == None) or (i in key_frames)):
                left = np.linspace(0.0, max_rot_angle, rot_interval)
                right = np.linspace(max_rot_angle, -(max_rot_angle), rot_interval*2)
                center = np.linspace(-(max_rot_angle), 0.0, rot_interval)
                thetas = np.concatenate((left,right,center))

                for theta in thetas:
                    new_P = y_axis_rotation(P,theta)
                    extrinsics.append(new_P)

    return np.asarray(extrinsics)

read_matrix(mat_file)

Reads a single matrix of float values from a file.

Parameters:

Name Type Description Default
mat_file str

Input file for the matrix to be read.

required

Returns:

Type Description
ndarray

The matrix stored in the given file.

Source code in src/cvt/io.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def read_matrix(mat_file: str) -> np.ndarray:
    """Reads a single matrix of float values from a file.

    Parameters:
        mat_file: Input file for the matrix to be read.

    Returns:
        The matrix stored in the given file.
    """
    with open(mat_file, 'r') as f:
        lines = f.readlines()
        M = []

        for l in lines:
            row = l.split()
            row = [float(s) for s in row]
            M.append(row)
        M = np.array(M)

    return M

read_mesh(mesh_file)

Reads a mesh from a file.

Parameters:

Name Type Description Default
mesh_file str

Input mesh file.

required

Returns:

Type Description
TriangleMesh

The mesh stored in the given file.

Source code in src/cvt/io.py
141
142
143
144
145
146
147
148
149
150
def read_mesh(mesh_file: str) -> o3d.geometry.TriangleMesh:
    """Reads a mesh from a file.

    Parameters:
        mesh_file: Input mesh file.

    Returns:
        The mesh stored in the given file.
    """
    return o3d.io.read_triangle_mesh(mesh_file)

read_pfm(pfm_file)

Reads a file in *.pfm format.

Parameters:

Name Type Description Default
pfm_file str

Input *.pfm file to be read.

required

Returns:

Type Description
ndarray

Data map that was stored in the *.pfm file.

Source code in src/cvt/io.py
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def read_pfm(pfm_file: str) -> np.ndarray:
    """Reads a file in *.pfm format.

    Parameters:
        pfm_file: Input *.pfm file to be read.

    Returns:
        Data map that was stored in the *.pfm file.
    """
    with open(pfm_file, 'rb') as pfm_file:
        color = None
        width = None
        height = None
        scale = None
        data_type = None
        header = pfm_file.readline().decode('iso8859_15').rstrip()

        if header == 'PF':
            color = True
        elif header == 'Pf':
            color = False
        else:
            raise Exception('Not a PFM file.')
        dim_match = re.match(r'^(\d+)\s(\d+)\s$', pfm_file.readline().decode('iso8859_15'))
        if dim_match:
            width, height = map(int, dim_match.groups())
        else:
            raise Exception('Malformed PFM header.')
        # scale = float(file.readline().rstrip())
        scale = float((pfm_file.readline()).decode('iso8859_15').rstrip())
        if scale < 0: # little-endian
            data_type = '<f'
        else:
            data_type = '>f' # big-endian
        data_string = pfm_file.read()
        data = np.fromstring(data_string, data_type)
        shape = (height, width, 3) if color else (height, width)
        data = np.reshape(data, shape)
        data = cv2.flip(data, 0)
    return data

read_point_cloud(point_cloud_file)

Reads a point cloud from a file.

Parameters:

Name Type Description Default
point_cloud_file str

Input point cloud file.

required

Returns:

Type Description
PointCloud

The point cloud stored in the given file.

Source code in src/cvt/io.py
215
216
217
218
219
220
221
222
223
224
def read_point_cloud(point_cloud_file: str) -> o3d.geometry.PointCloud:
    """Reads a point cloud from a file.

    Parameters:
        point_cloud_file: Input point cloud file.

    Returns:
        The point cloud stored in the given file.
    """
    return o3d.io.read_point_cloud(point_cloud_file)

read_single_cam_sfm(cam_file, depth_planes=256)

Reads a single camera file in SFM format.

Parameters:

Name Type Description Default
cam_file str

Input camera file to be read.

required
depth_planes int

Number of depth planes to store in the view metadata.

256

Returns:

Type Description
ndarray

Camera extrinsics, intrinsics, and view metadata (2x4x4).

Source code in src/cvt/io.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
def read_single_cam_sfm(cam_file: str, depth_planes: int = 256) -> np.ndarray:
    """Reads a single camera file in SFM format.

    Parameters:
        cam_file: Input camera file to be read.
        depth_planes: Number of depth planes to store in the view metadata.

    Returns:
        Camera extrinsics, intrinsics, and view metadata (2x4x4).
    """
    cam = np.zeros((2, 4, 4))

    with open(cam_file, 'r') as cam_file:
        words = cam_file.read().split()

    words_len = len(words)

    # read extrinsic
    for i in range(0, 4):
        for j in range(0, 4):
            extrinsic_index = 4 * i + j + 1
            cam[0,i,j] = float(words[extrinsic_index])

    # read intrinsic
    for i in range(0, 3):
        for j in range(0, 3):
            intrinsic_index = 3 * i + j + 18
            cam[1,i,j] = float(words[intrinsic_index])

    if words_len == 29:
        cam[1,3,0] = float(words[27])
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = depth_planes
        cam[1,3,3] = cam[1][3][0] + (cam[1][3][1] * cam[1][3][2])
    elif words_len == 30:
        cam[1,3,0] = float(words[27])
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = float(words[29])
        cam[1,3,3] = cam[1][3][0] + (cam[1][3][1] * cam[1][3][2])
    elif words_len == 31:
        cam[1,3,0] = words[27]
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = float(words[29])
        cam[1,3,3] = float(words[30])
    else:
        cam[1,3,0] = 0
        cam[1,3,1] = 0
        cam[1,3,2] = 0
        cam[1,3,3] = 1

    return cam

read_stereo_intrinsics_yaml(intrinsics_file)

Reads intrinsics information for a stereo camera pair from a *.yaml file.

Parameters:

Name Type Description Default
intrinsics_file str

Input *.yaml file storing the intrinsics information.

required

Returns:

Name Type Description
K_left ndarray

Intrinsics matrix (3x3) of left camera.

D_left ndarray

Distortion coefficients vector (1x4) of left camera.

K_right ndarray

Intrinsics matrix (3x3) of right camera.

D_right ndarray

Distortion coefficients vector (1x4) of right camera.

R ndarray

Relative rotation matrix (3x3) from left -> right cameras.

T ndarray

Relative translation vector (1x3) from left -> right cameras.

Source code in src/cvt/io.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def read_stereo_intrinsics_yaml(intrinsics_file: str) -> Tuple[ np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray]:
    """Reads intrinsics information for a stereo camera pair from a *.yaml file.

    Parameters:
        intrinsics_file: Input *.yaml file storing the intrinsics information.

    Returns:
        K_left: Intrinsics matrix (3x3) of left camera.
        D_left: Distortion coefficients vector (1x4) of left camera.
        K_right: Intrinsics matrix (3x3) of right camera.
        D_right: Distortion coefficients vector (1x4) of right camera.
        R: Relative rotation matrix (3x3) from left -> right cameras.
        T: Relative translation vector (1x3) from left -> right cameras.
    """
    K_left = np.zeros((3,3))
    D_left = np.zeros((1,4))
    K_right = np.zeros((3,3))
    D_right = np.zeros((1,4))
    R = np.zeros((3,3))
    T = np.zeros((1,3))

    cv_file = cv2.FileStorage(intrinsics_file, cv2.FILE_STORAGE_READ)

    left = cv_file.getNode("left")
    K_left = left.getNode("K").mat()
    D_left = left.getNode("D").mat()

    right = cv_file.getNode("right")
    K_right = right.getNode("K").mat()
    D_right = right.getNode("D").mat()

    R = cv_file.getNode("R").mat()
    T = cv_file.getNode("T").mat()

    cv_file.release()

    return [K_left, D_left, K_right, D_right, R, T]

save_model(model, cfg, name='ckpt_model.pth')

Saves model weights to disk.

Source code in src/cvt/io.py
422
423
424
425
426
427
428
429
430
def save_model(model, cfg, name="ckpt_model.pth"):
    """Saves model weights to disk.
    """
    ckpt_path = cfg["model"]["ckpt"]
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    print(f"Saving model checkpoint to {ckpt_path}...")
    model_path = os.path.join(save_folder, name)
    torch.save(model.state_dict(), model_path)

write_cam_sfm(cam_file, intrinsics, extrinsics)

Writes intrinsic and extrinsic camera parameters to a file in sfm format.

Parameters:

Name Type Description Default
cam_file str

The file to be writen to.

required
intrinsics ndarray

Camera intrinsic data to be written.

required
extrinsics ndarray

Camera extrinsic data to be written.

required
Source code in src/cvt/io.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
def write_cam_sfm(cam_file: str, intrinsics: np.ndarray, extrinsics: np.ndarray) -> None:
    """Writes intrinsic and extrinsic camera parameters to a file in sfm format.

    Parameters:
        cam_file: The file to be writen to.
        intrinsics: Camera intrinsic data to be written.
        extrinsics: Camera extrinsic data to be written.
    """
    with open(cam_file, "w") as f:
        f.write('extrinsic\n')
        for i in range(0, 4):
            for j in range(0, 4):
                f.write(str(extrinsics[i][j]) + ' ')
            f.write('\n')
        f.write('\n')

        f.write('intrinsic\n')
        for i in range(0, 3):
            for j in range(0, 3):
                f.write(str(intrinsics[i][j]) + ' ')
            f.write('\n')

write_matrix(M, mat_file)

Writes a single matrix to a file.

Parameters:

Name Type Description Default
M ndarray

Matrix to be stored.

required
mat_file str

Output file where the given matrix is to be writen.

required
Source code in src/cvt/io.py
343
344
345
346
347
348
349
350
351
352
353
354
def write_matrix(M: np.ndarray, mat_file: str) -> None:
    """Writes a single matrix to a file.

    Parameters:
        M: Matrix to be stored.
        mat_file: Output file where the given matrix is to be writen.
    """
    with open(mat_file, "w") as f:
        for row in M:
            for e in row:
                f.write("{} ".format(e))
            f.write("\n")

write_mesh(mesh_file, mesh)

Writes a mesh to a file.

Parameters:

Name Type Description Default
mesh_file str

Output mesh file.

required
mesh TriangleMesh

Mesh to be stored.

required
Source code in src/cvt/io.py
356
357
358
359
360
361
362
363
def write_mesh(mesh_file: str, mesh: o3d.geometry.TriangleMesh) -> None:
    """Writes a mesh to a file.

    Parameters:
        mesh_file: Output mesh file.
        mesh: Mesh to be stored.
    """
    return o3d.io.write_triangle_mesh(mesh_file, mesh)

write_pfm(pfm_file, data_map, scale=1.0)

Writes a data map to a file in *.pfm format.

Parameters:

Name Type Description Default
pfm_file str

Output *.pfm file to store the data map.

required
data_map ndarray

Data map to be stored.

required
scale float

Value used to scale the data map.

1.0
Source code in src/cvt/io.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def write_pfm(pfm_file: str, data_map: np.ndarray, scale: float = 1.0) -> None:
    """Writes a data map to a file in *.pfm format.

    Parameters:
        pfm_file: Output *.pfm file to store the data map.
        data_map: Data map to be stored.
        scale: Value used to scale the data map.
    """
    with open(pfm_file, 'wb') as pfm_file:
        color = None

        if data_map.dtype.name != 'float32':
            raise Exception('Image dtype must be float32.')

        data_map = np.flipud(data_map)

        if len(data_map.shape) == 3 and data_map.shape[2] == 3: # color data_map
            color = True
        elif len(data_map.shape) == 2 or (len(data_map.shape) == 3 and data_map.shape[2] == 1): # greyscale
            color = False
        else:
            raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

        a = 'PF\n' if color else 'Pf\n'
        b = '%d %d\n' % (data_map.shape[1], data_map.shape[0])

        pfm_file.write(a.encode('iso8859-15'))
        pfm_file.write(b.encode('iso8859-15'))

        endian = data_map.dtype.byteorder

        if endian == '<' or endian == '=' and sys.byteorder == 'little':
            scale = -scale

        c = '%f\n' % scale
        pfm_file.write(c.encode('iso8859-15'))

        data_map_string = data_map.tostring()
        pfm_file.write(data_map_string)