-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloo.py
34 lines (27 loc) · 1.31 KB
/
loo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import torch
from torch import autocast
from PIL import Image
class StableDiffusionModel:
def generate_image(self, prompt, num_steps=50, guidance_scale=7.5):
# Encode the text into latent space
text_embedding = self.encode_text(prompt)
# Sample random noise as the initial latent image
latents = self.sample_latent_noise(batch_size=1, latent_shape=(4, 64, 64))
latents = latents.to(device)
# Loop over timesteps to gradually refine the image
for timestep in self.scheduler.timesteps:
with autocast("cuda"):
# Denoise step
noise_pred = self.denoise(latents, text_embedding, timestep)
# Guidance (scale the predicted noise)
noise_pred = noise_pred * guidance_scale
# Update latent image (subtract noise)
latents = self.step_scheduler(noise_pred, timestep, latents)
# Decode latents to pixel space (e.g., using VAE or other decoder)
image = self.decode_latents(latents)
return image
def decode_latents(self, latents):
# Decode latent space to an image
latents = latents.cpu().detach().numpy()
image = ((latents + 1.0) * 127.5).astype("uint8")
return Image.fromarray(image)