Skip to content

Commit

Permalink
Merge branch 'main' into docker-build-workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
sayakpaul authored Mar 4, 2024
2 parents fb08f3c + b9e1c30 commit 82e0f5b
Show file tree
Hide file tree
Showing 33 changed files with 690 additions and 154 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/push_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ env:
jobs:
setup_torch_cuda_pipeline_matrix:
name: Setup Torch Pipelines CUDA Slow Tests Matrix
runs-on: docker-gpu
runs-on: [single-gpu, nvidia-gpu, t4, ci]
container:
image: diffusers/diffusers-pytorch-cpu # this is a CPU image, but we need it to fetch the matrix
options: --shm-size "16gb" --ipc host
Expand Down Expand Up @@ -62,7 +62,6 @@ jobs:
needs: setup_torch_cuda_pipeline_matrix
strategy:
fail-fast: false
max-parallel: 1
matrix:
module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
runs-on: [single-gpu, nvidia-gpu, t4, ci]
Expand Down
29 changes: 29 additions & 0 deletions benchmarks/base_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,35 @@ def run_inference(self, pipe, args):
)


class IPAdapterTextToImageBenchmark(TextToImageBenchmark):
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_neg_embed.png"
image = load_image(url)

def __init__(self, args):
pipe = self.pipeline_class.from_pretrained(args.ckpt, torch_dtype=torch.float16).to("cuda")
pipe.load_ip_adapter(
args.ip_adapter_id[0],
subfolder="models" if "sdxl" not in args.ip_adapter_id[1] else "sdxl_models",
weight_name=args.ip_adapter_id[1],
)

if args.run_compile:
pipe.unet.to(memory_format=torch.channels_last)
print("Run torch compile")
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)

pipe.set_progress_bar_config(disable=True)
self.pipe = pipe

def run_inference(self, pipe, args):
_ = pipe(
prompt=PROMPT,
ip_adapter_image=self.image,
num_inference_steps=args.num_inference_steps,
num_images_per_prompt=args.batch_size,
)


class ControlNetBenchmark(TextToImageBenchmark):
pipeline_class = StableDiffusionControlNetPipeline
aux_network_class = ControlNetModel
Expand Down
32 changes: 32 additions & 0 deletions benchmarks/benchmark_ip_adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import argparse
import sys


sys.path.append(".")
from base_classes import IPAdapterTextToImageBenchmark # noqa: E402


IP_ADAPTER_CKPTS = {
"runwayml/stable-diffusion-v1-5": ("h94/IP-Adapter", "ip-adapter_sd15.bin"),
"stabilityai/stable-diffusion-xl-base-1.0": ("h94/IP-Adapter", "ip-adapter_sdxl.bin"),
}


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--ckpt",
type=str,
default="runwayml/stable-diffusion-v1-5",
choices=list(IP_ADAPTER_CKPTS.keys()),
)
parser.add_argument("--batch_size", type=int, default=1)
parser.add_argument("--num_inference_steps", type=int, default=50)
parser.add_argument("--model_cpu_offload", action="store_true")
parser.add_argument("--run_compile", action="store_true")
args = parser.parse_args()

args.ip_adapter_id = IP_ADAPTER_CKPTS[args.ckpt]
benchmark_pipe = IPAdapterTextToImageBenchmark(args)
args.ckpt = f"{args.ckpt} (IP-Adapter)"
benchmark_pipe.benchmark(args)
2 changes: 1 addition & 1 deletion benchmarks/run_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def main():
command += " --run_compile"
run_command(command.split())

elif file == "benchmark_sd_inpainting.py":
elif file in ["benchmark_sd_inpainting.py", "benchmark_ip_adapters.py"]:
sdxl_ckpt = "stabilityai/stable-diffusion-xl-base-1.0"
command = f"python {file} --ckpt {sdxl_ckpt}"
run_command(command.split())
Expand Down
8 changes: 6 additions & 2 deletions docs/source/en/tutorials/using_peft_for_inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ list_adapters_component_wise

If you want to compile your model with `torch.compile` make sure to first fuse the LoRA weights into the base model and unload them.

```py
```diff
pipe.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
pipe.load_lora_weights("CiroN2022/toy-face", weight_name="toy_face_sdxl.safetensors", adapter_name="toy")

Expand All @@ -178,12 +178,16 @@ pipe.set_adapters(["pixel", "toy"], adapter_weights=[0.5, 1.0])
pipe.fuse_lora()
pipe.unload_lora_weights()

pipe = torch.compile(pipe)
+ pipe.unet.to(memory_format=torch.channels_last)
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)

prompt = "toy_face of a hacker with a hoodie, pixel art"
image = pipe(prompt, num_inference_steps=30, generator=torch.manual_seed(0)).images[0]
```

> [!TIP]
> You can refer to the `torch.compile()` section [here](https://huggingface.co/docs/diffusers/main/en/optimization/torch2.0#torchcompile) and [here](https://huggingface.co/docs/diffusers/main/en/tutorials/fast_diffusion#torchcompile) for more elaborate examples.
## Fusing adapters into the model

You can use PEFT to easily fuse/unfuse multiple adapters directly into the model weights (both UNet and text encoder) using the [`~diffusers.loaders.LoraLoaderMixin.fuse_lora`] method, which can lead to a speed-up in inference and lower VRAM usage.
Expand Down
37 changes: 37 additions & 0 deletions examples/dreambooth/README_sdxl.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,3 +206,40 @@ You can explore the results from a couple of our internal experiments by checkin
## Running on a free-tier Colab Notebook

Check out [this notebook](https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/SDXL_DreamBooth_LoRA_.ipynb).

## Conducting EDM-style training

It's now possible to perform EDM-style training as proposed in [Elucidating the Design Space of Diffusion-Based Generative Models](https://arxiv.org/abs/2206.00364).

For the SDXL model, simple set:

```diff
+ --do_edm_style_training \
```

Other SDXL-like models that use the EDM formulation, such as [playgroundai/playground-v2.5-1024px-aesthetic](https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic), can also be DreamBooth'd with the script. Below is an example command:

```bash
accelerate launch train_dreambooth_lora_sdxl.py \
--pretrained_model_name_or_path="playgroundai/playground-v2.5-1024px-aesthetic" \
--instance_data_dir="dog" \
--output_dir="dog-playground-lora" \
--mixed_precision="fp16" \
--instance_prompt="a photo of sks dog" \
--resolution=1024 \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--learning_rate=1e-4 \
--use_8bit_adam \
--report_to="wandb" \
--lr_scheduler="constant" \
--lr_warmup_steps=0 \
--max_train_steps=500 \
--validation_prompt="A photo of sks dog in a bucket" \
--validation_epochs=25 \
--seed="0" \
--push_to_hub
```

> [!CAUTION]
> Min-SNR gamma is not supported with the EDM-style training yet. When training with the PlaygroundAI model, it's recommended to not pass any "variant".
99 changes: 99 additions & 0 deletions examples/dreambooth/test_dreambooth_lora_edm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# coding=utf-8
# Copyright 2024 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import os
import sys
import tempfile

import safetensors


sys.path.append("..")
from test_examples_utils import ExamplesTestsAccelerate, run_command # noqa: E402


logging.basicConfig(level=logging.DEBUG)

logger = logging.getLogger()
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)


class DreamBoothLoRASDXLWithEDM(ExamplesTestsAccelerate):
def test_dreambooth_lora_sdxl_with_edm(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/dreambooth/train_dreambooth_lora_sdxl.py
--pretrained_model_name_or_path hf-internal-testing/tiny-stable-diffusion-xl-pipe
--do_edm_style_training
--instance_data_dir docs/source/en/imgs
--instance_prompt photo
--resolution 64
--train_batch_size 1
--gradient_accumulation_steps 1
--max_train_steps 2
--learning_rate 5.0e-04
--scale_lr
--lr_scheduler constant
--lr_warmup_steps 0
--output_dir {tmpdir}
""".split()

run_command(self._launch_args + test_args)
# save_pretrained smoke test
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))

# make sure the state_dict has the correct naming in the parameters.
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
is_lora = all("lora" in k for k in lora_state_dict.keys())
self.assertTrue(is_lora)

# when not training the text encoder, all the parameters in the state dict should start
# with `"unet"` in their names.
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
self.assertTrue(starts_with_unet)

def test_dreambooth_lora_playground(self):
with tempfile.TemporaryDirectory() as tmpdir:
test_args = f"""
examples/dreambooth/train_dreambooth_lora_sdxl.py
--pretrained_model_name_or_path hf-internal-testing/tiny-playground-v2-5-pipe
--instance_data_dir docs/source/en/imgs
--instance_prompt photo
--resolution 64
--train_batch_size 1
--gradient_accumulation_steps 1
--max_train_steps 2
--learning_rate 5.0e-04
--scale_lr
--lr_scheduler constant
--lr_warmup_steps 0
--output_dir {tmpdir}
""".split()

run_command(self._launch_args + test_args)
# save_pretrained smoke test
self.assertTrue(os.path.isfile(os.path.join(tmpdir, "pytorch_lora_weights.safetensors")))

# make sure the state_dict has the correct naming in the parameters.
lora_state_dict = safetensors.torch.load_file(os.path.join(tmpdir, "pytorch_lora_weights.safetensors"))
is_lora = all("lora" in k for k in lora_state_dict.keys())
self.assertTrue(is_lora)

# when not training the text encoder, all the parameters in the state dict should start
# with `"unet"` in their names.
starts_with_unet = all(key.startswith("unet") for key in lora_state_dict.keys())
self.assertTrue(starts_with_unet)
Loading

0 comments on commit 82e0f5b

Please sign in to comment.