Skip to content

IO

Pose File Formats

Please see the resources page on pose file formats to learn more about the expected/supported formats commonly used in this library.

A suite of common input/output functions.

This module includes several functions for reading and writing different types of data useful for computer vision applications.

This module contains the following functions:

  • read_cams_sfm(camera_path, extension) - Reads an entire directory of camera files in SFM format.
  • read_cams_trajectory(log_file) - Reads camera file in Trajectory File format.
  • read_extrinsics_tum(tum_file, key_frames) - Reads extrinsic camera trajectories in TUM format [timestamp tx ty tz qx qy qz qw].
  • read_matrix(mat_file) - Reads a single matrix of float values from a file.
  • read_mesh(mesh_file) - Reads a mesh from a file.
  • read_cluster_list(filename) - Reads a cluster list file encoding supporting camera viewpoints.
  • read_pfm(pfm_file) - Reads a file in *.pfm format.
  • read_point_cloud(point_cloud_file) - Reads a point cloud from a file.
  • read_single_cam_sfm(cam_file, depth_planes) - Reads a single camera file in SFM format.
  • read_stereo_intrinsics_yaml(intrinsics_file) - Reads intrinsics information for a stereo camera pair from a *.yaml file.
  • write_cam_sfm() -
  • write_matrix(M, mat_file) - Writes a single matrix to a file.
  • write_mesh(mesh_file, mesh) - Writes a mesh to a file.
  • write_pfm(pfm_file, data_map, scale) - Writes a data map to a file in *.pfm format.

load_pretrained_model(model, ckpt)

Loads model weights from disk.

Source code in cvt/io.py
450
451
452
453
454
455
456
457
458
459
def load_pretrained_model(model, ckpt):
    """Loads model weights from disk.
    """
    print(f"Loading model from: {ckpt}...")
    try:
        model.load_state_dict(torch.load(ckpt))
    except Exception as e:
        print(e)
        print("Failed loading network weights...")
        sys.exit()

read_cams_sfm(camera_path, extension='cam.txt')

Reads an entire directory of camera files in SFM format.

Parameters:

Name Type Description Default
camera_path str

Path to the directory of camera files.

required
extension str

File extension being used for the camera files.

'cam.txt'

Returns:

Type Description
np.ndarray

Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).

Source code in cvt/io.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def read_cams_sfm(camera_path: str, extension: str = "cam.txt") -> np.ndarray:
    """Reads an entire directory of camera files in SFM format.

    Parameters:
        camera_path: Path to the directory of camera files.
        extension: File extension being used for the camera files.

    Returns:
        Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).
    """
    cam_files = os.listdir(camera_path)
    cam_files.sort()

    cams = []

    for cf in cam_files:
        if (cf[-7:] != extension):
            continue

        cam_path = os.path.join(camera_path,cf)
        #with open(cam_path,'r') as f:
        cam = read_single_cam_sfm(cam_path, 256)
        cams.append(cam)

    return np.asarray(cams)

read_cams_trajectory(log_file)

Reads camera file in Trajectory File format.

Parameters:

Name Type Description Default
log_file str

Input *.log file to be read.

required

Returns:

Type Description
np.ndarray

Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).

Source code in cvt/io.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def read_cams_trajectory(log_file: str) -> np.ndarray:
    """Reads camera file in Trajectory File format.

    Parameters:
        log_file: Input *.log file to be read.

    Returns:
        Array of camera extrinsics, intrinsics, and view metadata (Nx2x4x4).
    """
    cams = []

    with open(log_file,'r') as f:
        lines = f.readlines()

        for i in range(0,len(lines),5):
            cam = np.zeros((4, 4))
            # read extrinsic
            for j in range(1, 5):
                cam[j-1,:] = np.array([float(l.strip()) for l in lines[i+j].split()])
            cam = np.linalg.inv(cam)

            cams.append(cam)

    return cams

read_cluster_list(filename)

Reads a cluster list file encoding supporting camera viewpoints.

Parameters:

Name Type Description Default
filename str

Input file encoding per-camera viewpoints.

required

Returns:

Type Description
List[Tuple[int, List[int]]]

An array of tuples encoding (ref_view, [src_1,src_2,..])

Source code in cvt/io.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def read_cluster_list(filename: str) -> List[Tuple[int,List[int]]]:
    """Reads a cluster list file encoding supporting camera viewpoints.

    Parameters:
        filename: Input file encoding per-camera viewpoints.

    Returns:
        An array of tuples encoding (ref_view, [src_1,src_2,..])
    """
    data = []
    with open(filename) as f:
        num_views = int(f.readline())
        all_views = list(range(0,num_views))

        for view_idx in range(num_views):
            ref_view = int(f.readline().rstrip())
            src_views = [int(x) for x in f.readline().rstrip().split()[1::2]]
            if len(src_views) == 0:
                continue
            data.append((ref_view, src_views))
    return data

read_extrinsics_tum(tum_file, key_frames=None)

Reads extrinsic camera trajectories in TUM format [timestamp tx ty tz qx qy qz qw].

Parameters:

Name Type Description Default
tum_file str

Input extrinsics file.

required
key_frames List[int]

Indices corresponding to the desired keyframes.

None

Returns:

Type Description
np.ndarray

Array of camera extrinsics (Nx4x4).

Source code in cvt/io.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def read_extrinsics_tum(tum_file: str, key_frames: List[int] = None) -> np.ndarray:
    """Reads extrinsic camera trajectories in TUM format [timestamp tx ty tz qx qy qz qw].

    Parameters:
        tum_file: Input extrinsics file.
        key_frames: Indices corresponding to the desired keyframes.

    Returns:
        Array of camera extrinsics (Nx4x4).
    """
    rot_interval = 30
    max_rot_angle = math.pi / 3

    extrinsics = []
    with open(tum_file,"r") as tf:
        lines = tf.readlines()

        for i,line in enumerate(lines):
            l = np.asarray(line.strip().split(" "), dtype=float)
            l = l[1:]
            t = l[:3]
            q = l[3:]

            R = rot.from_quat(q).as_matrix()
            R = R.transpose()
            t = -R@t
            P = np.zeros((4,4))
            P[:3,:3] = R
            P[:3,3] = t.transpose()
            P[3,3] = 1

            extrinsics.append(P)

            if((key_frames == None) or (i in key_frames)):
                left = np.linspace(0.0, max_rot_angle, rot_interval)
                right = np.linspace(max_rot_angle, -(max_rot_angle), rot_interval*2)
                center = np.linspace(-(max_rot_angle), 0.0, rot_interval)
                thetas = np.concatenate((left,right,center))

                for theta in thetas:
                    new_P = y_axis_rotation(P,theta)
                    extrinsics.append(new_P)

    return np.asarray(extrinsics)

read_matrix(mat_file)

Reads a single matrix of float values from a file.

Parameters:

Name Type Description Default
mat_file str

Input file for the matrix to be read.

required

Returns:

Type Description
np.ndarray

The matrix stored in the given file.

Source code in cvt/io.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def read_matrix(mat_file: str) -> np.ndarray:
    """Reads a single matrix of float values from a file.

    Parameters:
        mat_file: Input file for the matrix to be read.

    Returns:
        The matrix stored in the given file.
    """
    with open(mat_file, 'r') as f:
        lines = f.readlines()
        M = []

        for l in lines:
            row = l.split()
            row = [float(s) for s in row]
            M.append(row)
        M = np.array(M)

    return M

read_mesh(mesh_file)

Reads a mesh from a file.

Parameters:

Name Type Description Default
mesh_file str

Input mesh file.

required

Returns:

Type Description
o3d.geometry.TriangleMesh

The mesh stored in the given file.

Source code in cvt/io.py
158
159
160
161
162
163
164
165
166
167
def read_mesh(mesh_file: str) -> o3d.geometry.TriangleMesh:
    """Reads a mesh from a file.

    Parameters:
        mesh_file: Input mesh file.

    Returns:
        The mesh stored in the given file.
    """
    return o3d.io.read_triangle_mesh(mesh_file)

read_pfm(pfm_file)

Reads a file in *.pfm format.

Parameters:

Name Type Description Default
pfm_file str

Input *.pfm file to be read.

required

Returns:

Type Description
np.ndarray

Data map that was stored in the *.pfm file.

Source code in cvt/io.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def read_pfm(pfm_file: str) -> np.ndarray:
    """Reads a file in *.pfm format.

    Parameters:
        pfm_file: Input *.pfm file to be read.

    Returns:
        Data map that was stored in the *.pfm file.
    """
    with open(pfm_file, 'rb') as pfm_file:
        color = None
        width = None
        height = None
        scale = None
        data_type = None
        header = pfm_file.readline().decode('iso8859_15').rstrip()

        if header == 'PF':
            color = True
        elif header == 'Pf':
            color = False
        else:
            raise Exception('Not a PFM file.')
        dim_match = re.match(r'^(\d+)\s(\d+)\s$', pfm_file.readline().decode('iso8859_15'))
        if dim_match:
            width, height = map(int, dim_match.groups())
        else:
            raise Exception('Malformed PFM header.')
        # scale = float(file.readline().rstrip())
        scale = float((pfm_file.readline()).decode('iso8859_15').rstrip())
        if scale < 0: # little-endian
            data_type = '<f'
        else:
            data_type = '>f' # big-endian
        data_string = pfm_file.read()
        data = np.fromstring(data_string, data_type)
        shape = (height, width, 3) if color else (height, width)
        data = np.reshape(data, shape)
        data = cv2.flip(data, 0)
    return data

read_point_cloud(point_cloud_file)

Reads a point cloud from a file.

Parameters:

Name Type Description Default
point_cloud_file str

Input point cloud file.

required

Returns:

Type Description
o3d.geometry.PointCloud

The point cloud stored in the given file.

Source code in cvt/io.py
232
233
234
235
236
237
238
239
240
241
def read_point_cloud(point_cloud_file: str) -> o3d.geometry.PointCloud:
    """Reads a point cloud from a file.

    Parameters:
        point_cloud_file: Input point cloud file.

    Returns:
        The point cloud stored in the given file.
    """
    return o3d.io.read_point_cloud(point_cloud_file)

read_single_cam_sfm(cam_file, depth_planes=256)

Reads a single camera file in SFM format.

Parameters:

Name Type Description Default
cam_file str

Input camera file to be read.

required
depth_planes int

Number of depth planes to store in the view metadata.

256

Returns:

Type Description
np.ndarray

Camera extrinsics, intrinsics, and view metadata (2x4x4).

Source code in cvt/io.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def read_single_cam_sfm(cam_file: str, depth_planes: int = 256) -> np.ndarray:
    """Reads a single camera file in SFM format.

    Parameters:
        cam_file: Input camera file to be read.
        depth_planes: Number of depth planes to store in the view metadata.

    Returns:
        Camera extrinsics, intrinsics, and view metadata (2x4x4).
    """
    cam = np.zeros((2, 4, 4))

    with open(cam_file, 'r') as cam_file:
        words = cam_file.read().split()

    words_len = len(words)

    # read extrinsic
    for i in range(0, 4):
        for j in range(0, 4):
            extrinsic_index = 4 * i + j + 1
            cam[0,i,j] = float(words[extrinsic_index])

    # read intrinsic
    for i in range(0, 3):
        for j in range(0, 3):
            intrinsic_index = 3 * i + j + 18
            cam[1,i,j] = float(words[intrinsic_index])

    if words_len == 29:
        cam[1,3,0] = float(words[27])
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = depth_planes
        cam[1,3,3] = cam[1][3][0] + (cam[1][3][1] * cam[1][3][2])
    elif words_len == 30:
        cam[1,3,0] = float(words[27])
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = float(words[29])
        cam[1,3,3] = cam[1][3][0] + (cam[1][3][1] * cam[1][3][2])
    elif words_len == 31:
        cam[1,3,0] = words[27]
        cam[1,3,1] = float(words[28])
        cam[1,3,2] = float(words[29])
        cam[1,3,3] = float(words[30])
    else:
        cam[1,3,0] = 0
        cam[1,3,1] = 0
        cam[1,3,2] = 0
        cam[1,3,3] = 1

    return cam

read_stereo_intrinsics_yaml(intrinsics_file)

Reads intrinsics information for a stereo camera pair from a *.yaml file.

Parameters:

Name Type Description Default
intrinsics_file str

Input *.yaml file storing the intrinsics information.

required

Returns:

Name Type Description
K_left np.ndarray

Intrinsics matrix (3x3) of left camera.

D_left np.ndarray

Distortion coefficients vector (1x4) of left camera.

K_right np.ndarray

Intrinsics matrix (3x3) of right camera.

D_right np.ndarray

Distortion coefficients vector (1x4) of right camera.

R np.ndarray

Relative rotation matrix (3x3) from left -> right cameras.

T np.ndarray

Relative translation vector (1x3) from left -> right cameras.

Source code in cvt/io.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def read_stereo_intrinsics_yaml(intrinsics_file: str) -> Tuple[ np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray, \
                                                                np.ndarray]:
    """Reads intrinsics information for a stereo camera pair from a *.yaml file.

    Parameters:
        intrinsics_file: Input *.yaml file storing the intrinsics information.

    Returns:
        K_left: Intrinsics matrix (3x3) of left camera.
        D_left: Distortion coefficients vector (1x4) of left camera.
        K_right: Intrinsics matrix (3x3) of right camera.
        D_right: Distortion coefficients vector (1x4) of right camera.
        R: Relative rotation matrix (3x3) from left -> right cameras.
        T: Relative translation vector (1x3) from left -> right cameras.
    """
    K_left = np.zeros((3,3))
    D_left = np.zeros((1,4))
    K_right = np.zeros((3,3))
    D_right = np.zeros((1,4))
    R = np.zeros((3,3))
    T = np.zeros((1,3))

    cv_file = cv2.FileStorage(intrinsics_file, cv2.FILE_STORAGE_READ)

    left = cv_file.getNode("left")
    K_left = left.getNode("K").mat()
    D_left = left.getNode("D").mat()

    right = cv_file.getNode("right")
    K_right = right.getNode("K").mat()
    D_right = right.getNode("D").mat()

    R = cv_file.getNode("R").mat()
    T = cv_file.getNode("T").mat()

    cv_file.release()

    return [K_left, D_left, K_right, D_right, R, T]

save_model(model, cfg, name='ckpt_model.pth')

Saves model weights to disk.

Source code in cvt/io.py
440
441
442
443
444
445
446
447
448
def save_model(model, cfg, name="ckpt_model.pth"):
    """Saves model weights to disk.
    """
    ckpt_path = cfg["model"]["ckpt"]
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    print(f"Saving model checkpoint to {ckpt_path}...")
    model_path = os.path.join(save_folder, name)
    torch.save(model.state_dict(), model_path)

write_cam_sfm(cam_file, cam)

Writes intrinsic and extrinsic camera parameters to a file in sfm format.

Parameters:

Name Type Description Default
cam_file str

The file to be writen to.

required
cam np.ndarray

Camera extrinsic and intrinsic data to be written.

required
Source code in cvt/io.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
def write_cam_sfm(cam_file: str, cam: np.ndarray) -> None:
    """Writes intrinsic and extrinsic camera parameters to a file in sfm format.

    Parameters:
        cam_file: The file to be writen to.
        cam: Camera extrinsic and intrinsic data to be written.
    """
    with open(cam_file, "w") as f:
        f.write('extrinsic\n')
        for i in range(0, 4):
            for j in range(0, 4):
                f.write(str(cam[0][i][j]) + ' ')
            f.write('\n')
        f.write('\n')

        f.write('intrinsic\n')
        for i in range(0, 3):
            for j in range(0, 3):
                f.write(str(cam[1][i][j]) + ' ')
            f.write('\n')

        f.write('\n' + str(cam[1][3][0]) + ' ' + str(cam[1][3][1]) + ' ' + str(cam[1][3][2]) + ' ' + str(cam[1][3][3]) + '\n')

write_matrix(M, mat_file)

Writes a single matrix to a file.

Parameters:

Name Type Description Default
M np.ndarray

Matrix to be stored.

required
mat_file str

Output file where the given matrix is to be writen.

required
Source code in cvt/io.py
361
362
363
364
365
366
367
368
369
370
371
372
def write_matrix(M: np.ndarray, mat_file: str) -> None:
    """Writes a single matrix to a file.

    Parameters:
        M: Matrix to be stored.
        mat_file: Output file where the given matrix is to be writen.
    """
    with open(mat_file, "w") as f:
        for row in M:
            for e in row:
                f.write("{} ".format(e))
            f.write("\n")

write_mesh(mesh_file, mesh)

Writes a mesh to a file.

Parameters:

Name Type Description Default
mesh_file str

Output mesh file.

required
mesh o3d.geometry.TriangleMesh

Mesh to be stored.

required
Source code in cvt/io.py
374
375
376
377
378
379
380
381
def write_mesh(mesh_file: str, mesh: o3d.geometry.TriangleMesh) -> None:
    """Writes a mesh to a file.

    Parameters:
        mesh_file: Output mesh file.
        mesh: Mesh to be stored.
    """
    return o3d.io.write_triangle_mesh(mesh_file, mesh)

write_pfm(pfm_file, data_map, scale=1.0)

Writes a data map to a file in *.pfm format.

Parameters:

Name Type Description Default
pfm_file str

Output *.pfm file to store the data map.

required
data_map np.ndarray

Data map to be stored.

required
scale float

Value used to scale the data map.

1.0
Source code in cvt/io.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
def write_pfm(pfm_file: str, data_map: np.ndarray, scale: float = 1.0) -> None:
    """Writes a data map to a file in *.pfm format.

    Parameters:
        pfm_file: Output *.pfm file to store the data map.
        data_map: Data map to be stored.
        scale: Value used to scale the data map.
    """
    with open(pfm_file, 'wb') as pfm_file:
        color = None

        if data_map.dtype.name != 'float32':
            raise Exception('Image dtype must be float32.')

        data_map = np.flipud(data_map)

        if len(data_map.shape) == 3 and data_map.shape[2] == 3: # color data_map
            color = True
        elif len(data_map.shape) == 2 or (len(data_map.shape) == 3 and data_map.shape[2] == 1): # greyscale
            color = False
        else:
            raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

        a = 'PF\n' if color else 'Pf\n'
        b = '%d %d\n' % (data_map.shape[1], data_map.shape[0])

        pfm_file.write(a.encode('iso8859-15'))
        pfm_file.write(b.encode('iso8859-15'))

        endian = data_map.dtype.byteorder

        if endian == '<' or endian == '=' and sys.byteorder == 'little':
            scale = -scale

        c = '%f\n' % scale
        pfm_file.write(c.encode('iso8859-15'))

        data_map_string = data_map.tostring()
        pfm_file.write(data_map_string)