-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
108 lines (96 loc) · 4.17 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import os
import sys
import shutil
from PIL import Image
import numpy as np
import torch
import torchvision.transforms as T
from torchvision.transforms import Compose, ToTensor, Normalize, ToPILImage
from huggingface_hub import hf_hub_download
from transformers import AutoModel
import inspect
# helpfer function to download huggingface repo and use model
def download(repo_id, path, HF_TOKEN=None):
os.makedirs(path, exist_ok=True)
files_path = os.path.join(path, 'files.txt')
if not os.path.exists(files_path):
hf_hub_download(repo_id, 'files.txt', token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
with open(os.path.join(path, 'files.txt'), 'r') as f:
files = f.read().split('\n')
for file in [f for f in files if f] + ['config.json', 'wrapper.py', 'model.safetensors']:
full_path = os.path.join(path, file)
if not os.path.exists(full_path):
hf_hub_download(repo_id, file, token=HF_TOKEN, local_dir=path, local_dir_use_symlinks=False)
# helpfer function to download huggingface repo and use model
def load_model_from_local_path(path, HF_TOKEN=None):
cwd = os.getcwd()
os.chdir(path)
sys.path.insert(0, path)
model = AutoModel.from_pretrained(path, trust_remote_code=True, token=HF_TOKEN)
os.chdir(cwd)
sys.path.pop(0)
return model
# helpfer function to download huggingface repo and use model
def load_model_by_repo_id(repo_id, save_path, HF_TOKEN=None, force_download=False):
if force_download:
if os.path.exists(save_path):
shutil.rmtree(save_path)
download(repo_id, save_path, HF_TOKEN)
return load_model_from_local_path(save_path, HF_TOKEN)
def pil_to_input(pil_image):
trans = Compose([ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
input = trans(pil_image).unsqueeze(0)
return input
def compute_score(input1, input2, aligner, fr_model, return_bbox=False):
aligned_x1, orig_pred_ldmks1, aligned_ldmks1, score1, thetas1, normalized_bbox1 = aligner(input1)
aligned_x2, orig_pred_ldmks2, aligned_ldmks2, score2, thetas2, normalized_bbox2 = aligner(input2)
# recognize
input_signature = inspect.signature(fr_model.model.net.forward)
if input_signature.parameters.get('keypoints') is not None:
feat1 = fr_model(aligned_x1, aligned_ldmks1)
feat2 = fr_model(aligned_x2, aligned_ldmks2)
else:
feat1 = fr_model(aligned_x1)
feat2 = fr_model(aligned_x2)
# compute cosine similarity
cossim = torch.nn.functional.cosine_similarity(feat1, feat2)
if return_bbox:
return cossim, normalized_bbox1, normalized_bbox2
return cossim
edit_prompts = {
0: "Turn the person's hair pink",
1: "Let the person turn bald",
2: "Let the person have a tatto",
3: "Let the person wear purple makeup",
4: "Let the person grow a mustach",
5: "Turn the person into a zombie",
6: "Change the skin color to Avatar blue",
7: "Add elf-like ears",
8: "Add large vampire fangs",
9: "Apply Goth style makeup",
10: "Let the person wear a police suit",
11: "Let the person wear a bowtie",
12: "Let the person wear a helmet",
13: "Let the person wear sunglasses",
14: "Let the person wear earrings",
15: "Let the person smoke a cigar",
16: "Place a headband in the hair",
17: "Place a tiara on the top of the head",
18: "Let it be snowy",
19: "Change the background to a beach",
20: "Add a city skyline background",
21: "Add a forest background",
22: "Change the background to a desert",
23: "Set the background in a library",
24: "Let the person stand under the moon"
}
def get_image_embeddings(images, clip_processor, clip_model, device="cuda"):
inputs = clip_processor(images=images, return_tensors="pt").to(device)
with torch.no_grad():
image_features = clip_model.get_image_features(**inputs)
return image_features
def get_text_embeddings(prompts, clip_processor, clip_model, device="cuda"):
inputs = clip_processor(text=prompts, return_tensors="pt", padding=True, truncation=True).to(device)
with torch.no_grad():
text_features = clip_model.get_text_features(**inputs)
return text_features