-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from hugoycj/main
Implement torch hub loader for DSINE
- Loading branch information
Showing
2 changed files
with
147 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import torch | ||
import os | ||
from typing import Optional | ||
from torchvision import transforms | ||
import numpy as np | ||
import torch.nn.functional as F | ||
|
||
dependencies = ["torch", "numpy", "geffnet"] | ||
def _load_state_dict(local_file_path: Optional[str] = None): | ||
if local_file_path is not None and os.path.exists(local_file_path): | ||
# Load state_dict from local file | ||
state_dict = torch.load(local_file_path, map_location=torch.device("cpu")) | ||
else: | ||
# Load state_dict from the default URL | ||
file_name = "dsine.pt" | ||
url = f"https://huggingface.co/camenduru/DSINE/resolve/main/dsine.pt" | ||
state_dict = torch.hub.load_state_dict_from_url(url, file_name=file_name, map_location=torch.device("cpu")) | ||
|
||
return state_dict['model'] | ||
|
||
class Predictor: | ||
def __init__(self, model) -> None: | ||
from models.dsine import DSINE | ||
self.device = torch.device('cuda') | ||
self.model = model | ||
self.transform = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | ||
|
||
def infer_cv2(self, image): | ||
import cv2 | ||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | ||
return self.infer_pil(image) | ||
|
||
def infer_pil(self, img, intrins=None): | ||
import utils.utils as utils | ||
img = np.array(img).astype(np.float32) / 255.0 | ||
img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0).to(self.device) | ||
_, _, orig_H, orig_W = img.shape | ||
|
||
# zero-pad the input image so that both the width and height are multiples of 32 | ||
l, r, t, b = utils.pad_input(orig_H, orig_W) | ||
img = F.pad(img, (l, r, t, b), mode="constant", value=0.0) | ||
img = self.transform(img) | ||
|
||
if intrins is None: | ||
intrins = utils.get_intrins_from_fov(new_fov=60.0, H=orig_H, W=orig_W, device=self.device).unsqueeze(0) | ||
|
||
intrins[:, 0, 2] += l | ||
intrins[:, 1, 2] += t | ||
|
||
with torch.no_grad(): | ||
pred_norm = self.model(img, intrins=intrins)[-1] | ||
pred_norm = pred_norm[:, :, t:t+orig_H, l:l+orig_W] | ||
|
||
# pred_norm_np = pred_norm.cpu().detach().numpy()[0,:,:,:].transpose(1, 2, 0) # (H, W, 3) | ||
return pred_norm | ||
|
||
def DSINE(local_file_path: Optional[str] = None): | ||
from models import dsine | ||
|
||
state_dict = _load_state_dict(local_file_path) | ||
model = dsine.DSINE() | ||
model.load_state_dict(state_dict, strict=True) | ||
model.eval() | ||
model = model.to(torch.device("cuda")) | ||
model.pixel_coords = model.pixel_coords.to(torch.device("cuda")) | ||
|
||
return Predictor(model) | ||
|
||
|
||
def _test_run(): | ||
import argparse | ||
import torch.nn.functional as F | ||
import numpy as np | ||
|
||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument("--input", "-i", type=str, required=True, help="input image file") | ||
parser.add_argument("--output", "-o", type=str, required=True, help="output image file") | ||
parser.add_argument("--remote", action="store_true", help="use remote repo") | ||
parser.add_argument("--reload", action="store_true", help="reload remote repo") | ||
parser.add_argument("--pil", action="store_true", help="use PIL instead of OpenCV") | ||
args = parser.parse_args() | ||
|
||
if not args.remote: | ||
predictor = torch.hub.load(".", "DSINE", local_file_path='./checkpoints/dsine.pt', | ||
source="local", trust_repo=True) | ||
else: | ||
predictor = torch.hub.load(".", "DSINE", | ||
source="local", trust_repo=True) | ||
|
||
if args.pil: | ||
import PIL | ||
import torchvision.transforms.functional as TF | ||
|
||
image = PIL.Image.open(args.input).convert("RGB") | ||
h, w = image.height, image.width | ||
with torch.inference_mode(): | ||
normal = predictor.infer_pil(image)[0] # (H, W, 3) | ||
normal = (normal + 1) / 2 | ||
|
||
normal = TF.to_pil_image(normal.cpu()) | ||
normal.save(args.output) | ||
|
||
else: | ||
import cv2 | ||
image = cv2.imread(args.input, cv2.IMREAD_COLOR) | ||
h, w = image.shape[:2] | ||
with torch.inference_mode(): | ||
normal = predictor.infer_cv2(image)[0] # (H, W, 3) | ||
normal = (normal + 1) / 2 | ||
|
||
normal = (normal * 255).cpu().numpy().astype(np.uint8).transpose(1, 2, 0) | ||
normal = cv2.cvtColor(normal, cv2.COLOR_RGB2BGR) | ||
cv2.imwrite(args.output, normal) | ||
|
||
|
||
if __name__ == "__main__": | ||
_test_run() |