-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
(1) add support for ScanNet. (2) Visualize depths during testing.
- Loading branch information
1 parent
041e36f
commit fed4664
Showing
8 changed files
with
194 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# ScanNet Instructions | ||
|
||
I personally found it a bit tricky to setup the ScanNet dataset the first time I tried it. So, I am compiling some notes/instructions on how to do it in case someone finds it useful. | ||
|
||
### 1. Dataset download | ||
|
||
To download ScanNet data and its labels, follow the instructions [here](https://github.com/ScanNet/ScanNet). Basically, fill out the ScanNet Terms of Use agreement and email it to [scannet@googlegroups.com](mailto:scannet@googlegroups.com). You will receive a download link to the dataset. Download the dataset and unzip it. | ||
|
||
### 2. Use [SensReader](https://github.com/ScanNet/ScanNet/tree/master/SensReader/python) to extract RGB-D and camera data | ||
Use the `reader.py` script as follows for each scene you want to work with: | ||
``` | ||
python reader.py --filename [.sens file to export data from] --output_path [output directory to export data to] | ||
Options: | ||
--export_depth_images: export all depth frames as 16-bit pngs (depth shift 1000) | ||
--export_color_images: export all color frames as 8-bit rgb jpgs | ||
--export_poses: export all camera poses (4x4 matrix, camera to world) | ||
--export_intrinsics: export camera intrinsics (4x4 matrix) | ||
``` | ||
|
||
### 3. Then, use this [script](https://github.com/zju3dv/object_nerf/blob/main/data_preparation/scannet_sens_reader/convert_to_nerf_style_data.py) to convert the data to NeRF-style format. For instructions, see Step 1 [here](https://github.com/zju3dv/object_nerf/tree/main/data_preparation). | ||
1. The generated transforms_xxx.json comes with transformation matrix (from camera coordinate to world coordinate) in SLAM / OpenCV format (xyz -> right down forward). You need to change to NDC format (xyz -> right up back) in the dataloader for training with NeRF convention. | ||
2. For example, see the conversion done [here](https://github.com/cvg/nice-slam/blob/7af15cc33729aa5a8ca052908d96f495e34ab34c/src/utils/datasets.py#L205). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
expname = scannet_scene0000_00 | ||
basedir = ./logs | ||
datadir = /work/yashsb/datasets/ScanNet/ | ||
dataset_type = scannet | ||
|
||
no_batching = False | ||
|
||
use_viewdirs = True | ||
white_bkgd = False | ||
lrate_decay = 500 | ||
|
||
N_samples = 64 | ||
N_importance = 128 | ||
N_rand = 1024 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import os | ||
import torch | ||
import numpy as np | ||
import imageio | ||
import json | ||
import torch.nn.functional as F | ||
import cv2 | ||
import pyvista as pv | ||
|
||
trans_t = lambda t : torch.Tensor([ | ||
[1,0,0,0], | ||
[0,1,0,0], | ||
[0,0,1,t], | ||
[0,0,0,1]]).float() | ||
|
||
rot_phi = lambda phi : torch.Tensor([ | ||
[1,0,0,0], | ||
[0,np.cos(phi),-np.sin(phi),0], | ||
[0,np.sin(phi), np.cos(phi),0], | ||
[0,0,0,1]]).float() | ||
|
||
rot_theta = lambda th : torch.Tensor([ | ||
[np.cos(th),0,-np.sin(th),0], | ||
[0,1,0,0], | ||
[np.sin(th),0, np.cos(th),0], | ||
[0,0,0,1]]).float() | ||
|
||
|
||
def pose_spherical(theta, phi, radius): | ||
c2w = trans_t(radius) | ||
c2w = rot_phi(phi/180.*np.pi) @ c2w | ||
c2w = rot_theta(theta/180.*np.pi) @ c2w | ||
c2w = torch.Tensor(np.array([[-1,0,0,0],[0,0,1,0],[0,1,0,0],[0,0,0,1]])) @ c2w | ||
return c2w | ||
|
||
|
||
def load_scannet_data(basedir, sceneID, half_res=False, trainskip=10, testskip=1): | ||
''' | ||
basedir is something like: "/work/yashsb/datasets/ScanNet/" | ||
''' | ||
scansdir = os.path.join(basedir, "scans") | ||
basedir = os.path.join(basedir, "nerfstyle_"+sceneID) | ||
|
||
splits = ['train', 'val', 'test'] | ||
metas = {} | ||
for s in splits: | ||
with open(os.path.join(basedir, 'transforms_{}.json'.format(s)), 'r') as fp: | ||
metas[s] = json.load(fp) | ||
|
||
all_imgs = [] | ||
all_poses = [] | ||
counts = [0] | ||
for s in splits: | ||
meta = metas[s] | ||
imgs = [] | ||
poses = [] | ||
if s=='train': | ||
skip = trainskip | ||
else: | ||
skip = testskip | ||
|
||
for frame in meta['frames'][::skip]: | ||
fname = os.path.join(basedir, frame['file_path'] + '.png') | ||
imgs.append(imageio.imread(fname)) | ||
pose = np.array(frame['transform_matrix']) | ||
|
||
### NEED to do this because ScanNet uses OpenCV convention | ||
pose[:3, 1] *= -1 | ||
pose[:3, 2] *= -1 | ||
|
||
poses.append(pose) | ||
|
||
imgs = (np.array(imgs) / 255.).astype(np.float32) # keep all 4 channels (RGBA) | ||
poses = np.array(poses).astype(np.float32) | ||
counts.append(counts[-1] + imgs.shape[0]) | ||
all_imgs.append(imgs) | ||
all_poses.append(poses) | ||
|
||
i_split = [np.arange(counts[i], counts[i+1]) for i in range(3)] | ||
|
||
imgs = np.concatenate(all_imgs, 0) | ||
poses = np.concatenate(all_poses, 0) | ||
|
||
H, W = imgs[0].shape[:2] | ||
camera_angle_x = float(meta['camera_angle_x']) | ||
focal = .5 * W / np.tan(.5 * camera_angle_x) | ||
|
||
render_poses = torch.stack([pose_spherical(angle, -30.0, 4.0) for angle in np.linspace(-180,180,40+1)[:-1]], 0) | ||
|
||
if half_res: | ||
H = H//2 | ||
W = W//2 | ||
focal = focal/2. | ||
|
||
imgs_half_res = np.zeros((imgs.shape[0], H, W, 3)) | ||
for i, img in enumerate(imgs): | ||
imgs_half_res[i] = cv2.resize(img, (W, H), interpolation=cv2.INTER_AREA) | ||
imgs = imgs_half_res | ||
# imgs = tf.image.resize_area(imgs, [400, 400]).numpy() | ||
|
||
## getting an approximate bounding box for the scene | ||
# load scene mesh | ||
mesh = pv.read(os.path.join(scansdir, sceneID, f"{sceneID}_vh_clean.ply")) | ||
# get the bounding box | ||
bounding_box = torch.tensor(mesh.bounds[::2]) - 1, torch.tensor(mesh.bounds[1::2]) + 1 | ||
|
||
return imgs, poses, render_poses, [H, W, focal], i_split, bounding_box |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.