Commit
·
3df6611
1
Parent(s):
6304c5b
Adding changes to speed up the diffussion process
Browse files- app.py +38 -15
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -67,11 +67,11 @@ def image_loss(images, loss_type, device, elastic_transformer):
|
|
| 67 |
else:
|
| 68 |
return torch.tensor(0.0).to(device)
|
| 69 |
|
| 70 |
-
# Update configuration
|
| 71 |
-
height, width =
|
| 72 |
-
guidance_scale =
|
| 73 |
-
num_inference_steps =
|
| 74 |
-
loss_scale =
|
| 75 |
|
| 76 |
def generate_images(prompt, concept):
|
| 77 |
global pipe, device, elastic_transformer
|
|
@@ -89,9 +89,10 @@ def generate_images(prompt, concept):
|
|
| 89 |
progress = gr.Progress()
|
| 90 |
|
| 91 |
for idx, loss_type in enumerate(loss_functions):
|
| 92 |
-
progress(idx/len(loss_functions), f"Generating {loss_type} image...")
|
| 93 |
-
|
| 94 |
try:
|
|
|
|
|
|
|
|
|
|
| 95 |
# Better memory management
|
| 96 |
if torch.cuda.is_available():
|
| 97 |
torch.cuda.empty_cache()
|
|
@@ -180,8 +181,30 @@ def generate_images(prompt, concept):
|
|
| 180 |
|
| 181 |
latents = latents.detach() - cond_grad * sigma**2
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
latents = scheduler.step(noise_pred, t, latents).prev_sample
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
# Proper latent to image conversion
|
| 186 |
latents = (1 / 0.18215) * latents
|
| 187 |
with torch.no_grad():
|
|
@@ -230,7 +253,7 @@ def create_interface():
|
|
| 230 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
| 231 |
],
|
| 232 |
outputs=gr.Gallery(
|
| 233 |
-
label="Generated Images",
|
| 234 |
show_label=True,
|
| 235 |
elem_id="gallery",
|
| 236 |
columns=5,
|
|
@@ -238,12 +261,12 @@ def create_interface():
|
|
| 238 |
height="auto"
|
| 239 |
),
|
| 240 |
title="Stable Diffusion using Text Inversion",
|
| 241 |
-
description="""Generate images using Stable Diffusion with different style concepts. The
|
| 242 |
-
1. Original Image (No Loss)
|
| 243 |
-
2. Blue Channel Loss -
|
| 244 |
-
3. Elastic Loss -
|
| 245 |
-
4. Symmetry Loss -
|
| 246 |
-
5. Saturation Loss -
|
| 247 |
|
| 248 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
| 249 |
cache_examples=False,
|
|
|
|
| 67 |
else:
|
| 68 |
return torch.tensor(0.0).to(device)
|
| 69 |
|
| 70 |
+
# Update configuration for faster generation
|
| 71 |
+
height, width = 384, 384 # Reduced from 512x512 to 384x384
|
| 72 |
+
guidance_scale = 7.5
|
| 73 |
+
num_inference_steps = 30
|
| 74 |
+
loss_scale = 150
|
| 75 |
|
| 76 |
def generate_images(prompt, concept):
|
| 77 |
global pipe, device, elastic_transformer
|
|
|
|
| 89 |
progress = gr.Progress()
|
| 90 |
|
| 91 |
for idx, loss_type in enumerate(loss_functions):
|
|
|
|
|
|
|
| 92 |
try:
|
| 93 |
+
# Add detailed progress reporting
|
| 94 |
+
progress(idx/len(loss_functions), f"Starting {loss_type} image generation...")
|
| 95 |
+
|
| 96 |
# Better memory management
|
| 97 |
if torch.cuda.is_available():
|
| 98 |
torch.cuda.empty_cache()
|
|
|
|
| 181 |
|
| 182 |
latents = latents.detach() - cond_grad * sigma**2
|
| 183 |
|
| 184 |
+
# Diffusion process with progress updates
|
| 185 |
+
for i, t in enumerate(scheduler.timesteps):
|
| 186 |
+
current_progress = (idx + (i / len(scheduler.timesteps))) / len(loss_functions)
|
| 187 |
+
progress(current_progress, f"Generating {loss_type} image: Step {i+1}/{len(scheduler.timesteps)}")
|
| 188 |
+
|
| 189 |
+
# Apply loss less frequently for speed
|
| 190 |
+
if loss_type != 'none' and i % 8 == 0: # Changed from 5 to 8
|
| 191 |
+
with torch.set_grad_enabled(True):
|
| 192 |
+
# Enable gradients for images
|
| 193 |
+
denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5
|
| 194 |
+
denoised_images = denoised_images.requires_grad_() # Enable gradients for images
|
| 195 |
+
loss = image_loss(denoised_images, loss_type, device, elastic_transformer)
|
| 196 |
+
cond_grad = torch.autograd.grad(loss * loss_scale, latents)[0]
|
| 197 |
+
|
| 198 |
+
latents = latents.detach() - cond_grad * sigma**2
|
| 199 |
+
|
| 200 |
latents = scheduler.step(noise_pred, t, latents).prev_sample
|
| 201 |
+
|
| 202 |
+
# Clear CUDA cache more efficiently
|
| 203 |
+
if torch.cuda.is_available() and i % 10 == 0:
|
| 204 |
+
torch.cuda.empty_cache()
|
| 205 |
+
|
| 206 |
+
progress(idx/len(loss_functions), f"Finalizing {loss_type} image...")
|
| 207 |
+
|
| 208 |
# Proper latent to image conversion
|
| 209 |
latents = (1 / 0.18215) * latents
|
| 210 |
with torch.no_grad():
|
|
|
|
| 253 |
gr.Dropdown(choices=concepts, label="Select SD Concept")
|
| 254 |
],
|
| 255 |
outputs=gr.Gallery(
|
| 256 |
+
label="Generated Images (From Left to Right: Original, Blue Channel, Elastic, Symmetry, Saturation)",
|
| 257 |
show_label=True,
|
| 258 |
elem_id="gallery",
|
| 259 |
columns=5,
|
|
|
|
| 261 |
height="auto"
|
| 262 |
),
|
| 263 |
title="Stable Diffusion using Text Inversion",
|
| 264 |
+
description="""Generate images using Stable Diffusion with different style concepts. The gallery shows 5 images in this order:
|
| 265 |
+
1. Left-most: Original Image (No Loss) - Base generation without modifications
|
| 266 |
+
2. Second: Blue Channel Loss - Enhanced blue tones for atmospheric effects
|
| 267 |
+
3. Middle: Elastic Loss - Added elastic deformation for artistic distortion
|
| 268 |
+
4. Fourth: Symmetry Loss - Enforced symmetrical features
|
| 269 |
+
5. Right-most: Saturation Loss - Modified color saturation for vibrant effects
|
| 270 |
|
| 271 |
Note: Image generation may take several minutes. Please be patient while the images are being processed.""",
|
| 272 |
cache_examples=False,
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ transformers
|
|
| 4 |
gradio
|
| 5 |
torchvision
|
| 6 |
Pillow
|
| 7 |
-
scipy
|
|
|
|
|
|
| 4 |
gradio
|
| 5 |
torchvision
|
| 6 |
Pillow
|
| 7 |
+
scipy
|
| 8 |
+
accelerate
|