Spaces:
Runtime error
Runtime error
Commit
·
183c72e
1
Parent(s):
09f3d43
remove
Browse files
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 🎨
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: pink
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Inversion-InstantStyle
|
| 3 |
emoji: 🎨
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: pink
|
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
import random
|
| 3 |
-
from typing import Optional
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import numpy as np
|
|
@@ -9,16 +8,16 @@ import spaces
|
|
| 9 |
import torch
|
| 10 |
from diffusers import (AutoencoderKL, DDIMInverseScheduler, DDIMScheduler,
|
| 11 |
StableDiffusionXLPipeline)
|
| 12 |
-
from torchvision.transforms import ToTensor
|
| 13 |
|
| 14 |
# pyright: reportPrivateImportUsage=false
|
| 15 |
|
| 16 |
|
| 17 |
DESCRIPTION = f"""
|
| 18 |
-
# 🎨
|
| 19 |
This is an interactive demo of noisy DDIM inversion capabilities on top of Instant-Style styling method
|
| 20 |
|
| 21 |
-
This method proposed
|
| 22 |
|
| 23 |
A style benchmark : [style-bench](https://gojasper.github.io/style-bench) was also provided to facilitate evaluation of diffusion models for styling purposes.
|
| 24 |
"""
|
|
@@ -54,7 +53,7 @@ if gr.NO_RELOAD:
|
|
| 54 |
pipe.load_ip_adapter(
|
| 55 |
"h94/IP-Adapter",
|
| 56 |
subfolder="sdxl_models",
|
| 57 |
-
weight_name="ip-adapter_sdxl.safetensors"
|
| 58 |
)
|
| 59 |
pipe.to(device)
|
| 60 |
|
|
@@ -88,7 +87,7 @@ def img_to_latents(x: torch.Tensor, vae: AutoencoderKL):
|
|
| 88 |
return latents
|
| 89 |
|
| 90 |
|
| 91 |
-
def invert_image(model, image: np.ndarray, n_steps: int, width:int, height:int):
|
| 92 |
|
| 93 |
model.scheduler = invert_scheduler
|
| 94 |
|
|
@@ -128,7 +127,7 @@ def generate(
|
|
| 128 |
height: int = 1024,
|
| 129 |
guidance_scale_base: float = 5.0,
|
| 130 |
num_inference_steps_base: int = 25,
|
| 131 |
-
style_image_value
|
| 132 |
noise_scale: float = 1.5,
|
| 133 |
) -> PIL.Image.Image:
|
| 134 |
torch.manual_seed(seed)
|
|
@@ -146,18 +145,15 @@ def generate(
|
|
| 146 |
# Invert the image and get the latent
|
| 147 |
if style_image_value is not None:
|
| 148 |
latent = invert_image(pipe, style_image_value, 30, width, height)
|
| 149 |
-
print("Image was inverted")
|
| 150 |
-
print(latent)
|
| 151 |
|
| 152 |
latent = latent + noise_scale * noise
|
| 153 |
-
latent = latent / torch.sqrt(
|
|
|
|
|
|
|
| 154 |
|
| 155 |
else:
|
| 156 |
latent = noise
|
| 157 |
|
| 158 |
-
print("Noise added")
|
| 159 |
-
print(latent)
|
| 160 |
-
|
| 161 |
scale = {
|
| 162 |
"up": {"block_0": [0.0, 1.0, 0.0]},
|
| 163 |
}
|
|
|
|
| 1 |
import os
|
| 2 |
import random
|
|
|
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import numpy as np
|
|
|
|
| 8 |
import torch
|
| 9 |
from diffusers import (AutoencoderKL, DDIMInverseScheduler, DDIMScheduler,
|
| 10 |
StableDiffusionXLPipeline)
|
| 11 |
+
from torchvision.transforms import ToTensor
|
| 12 |
|
| 13 |
# pyright: reportPrivateImportUsage=false
|
| 14 |
|
| 15 |
|
| 16 |
DESCRIPTION = f"""
|
| 17 |
+
# 🎨 Inversion-InstantStyle 🎨
|
| 18 |
This is an interactive demo of noisy DDIM inversion capabilities on top of Instant-Style styling method
|
| 19 |
|
| 20 |
+
This method is proposed by *Eyal Benaroche, Clément Chadebec, Onur Tasar, and Benjamin Aubin* from Jasper Research in the context of Eyal's internship with Ecole Polytechnique.
|
| 21 |
|
| 22 |
A style benchmark : [style-bench](https://gojasper.github.io/style-bench) was also provided to facilitate evaluation of diffusion models for styling purposes.
|
| 23 |
"""
|
|
|
|
| 53 |
pipe.load_ip_adapter(
|
| 54 |
"h94/IP-Adapter",
|
| 55 |
subfolder="sdxl_models",
|
| 56 |
+
weight_name="ip-adapter_sdxl.safetensors",
|
| 57 |
)
|
| 58 |
pipe.to(device)
|
| 59 |
|
|
|
|
| 87 |
return latents
|
| 88 |
|
| 89 |
|
| 90 |
+
def invert_image(model, image: np.ndarray, n_steps: int, width: int, height: int):
|
| 91 |
|
| 92 |
model.scheduler = invert_scheduler
|
| 93 |
|
|
|
|
| 127 |
height: int = 1024,
|
| 128 |
guidance_scale_base: float = 5.0,
|
| 129 |
num_inference_steps_base: int = 25,
|
| 130 |
+
style_image_value=None,
|
| 131 |
noise_scale: float = 1.5,
|
| 132 |
) -> PIL.Image.Image:
|
| 133 |
torch.manual_seed(seed)
|
|
|
|
| 145 |
# Invert the image and get the latent
|
| 146 |
if style_image_value is not None:
|
| 147 |
latent = invert_image(pipe, style_image_value, 30, width, height)
|
|
|
|
|
|
|
| 148 |
|
| 149 |
latent = latent + noise_scale * noise
|
| 150 |
+
latent = latent / torch.sqrt(
|
| 151 |
+
torch.tensor(1 + noise_scale**2).to(device, dtype=torch.float16)
|
| 152 |
+
)
|
| 153 |
|
| 154 |
else:
|
| 155 |
latent = noise
|
| 156 |
|
|
|
|
|
|
|
|
|
|
| 157 |
scale = {
|
| 158 |
"up": {"block_0": [0.0, 1.0, 0.0]},
|
| 159 |
}
|