wan2-2-fp8da-aoti-preview2

Running on Zero

App Files Files Community

r3gm commited on 9 days ago

Commit

40f3527

verified ·

1 Parent(s): 1931223

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -31

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import warnings
 import time
 import gc
 import uuid
 import cv2
 import numpy as np
@@ -189,24 +190,32 @@ def interpolate_bits(frames_np, multiplier=2, scale=1.0):
     # Process Frames
     # Load first frame into GPU
     I1 = to_tensor(frames_np[0])
-    for i in range(T - 1):
-        I0 = I1
-        # Add original frame to output
-        output_frames.append(from_tensor(I0))
-        # Load next frame
-        I1 = to_tensor(frames_np[i+1])
-        # Generate intermediate frames
-        mid_tensors = make_inference(I0, I1, n_interp)
-        # Append intermediate frames
-        for mid in mid_tensors:
-            output_frames.append(from_tensor(mid))
-    # Add the very last frame
-    output_frames.append(from_tensor(I1))
     # Cleanup
     del I0, I1, mid_tensors
@@ -349,8 +358,9 @@ def get_inference_duration(
     if guidance_scale > 1:
         gen_time = gen_time * 1.8
-    if frame_multiplier > 1:
-        total_out_frames = (num_frames * frame_multiplier) - num_frames
         inter_time = (total_out_frames * 0.02)
         print(inter_time)
         gen_time += inter_time
@@ -410,23 +420,27 @@ def run_inference(
     raw_frames_np = result.frames[0]  # Returns (T, H, W, C) float32
     pipe.scheduler = original_scheduler
-    if frame_multiplier > 1:
         start = time.time()
-        print(f"Processing frames (RIFE Multiplier: {frame_multiplier}x)...")
         rife_model.device()
         rife_model.flownet = rife_model.flownet.half()
-        final_frames = interpolate_bits(raw_frames_np, multiplier=int(frame_multiplier))
         print("Interpolation time passed:", time.time() - start)
     else:
         final_frames = list(raw_frames_np)
-    final_fps = FIXED_FPS * int(frame_multiplier)
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
     start = time.time()
-    export_to_video(final_frames, video_path, fps=final_fps, quality=quality)
     print(f"Export time passed, {final_fps} FPS:", time.time() - start)
     return video_path, task_name
@@ -446,7 +460,7 @@ def generate_video(
     quality=5,
     scheduler="UniPCMultistep",
     flow_shift=6.0,
-    frame_multiplier=1,
     video_component=True,
     progress=gr.Progress(track_tqdm=True),
 ):
@@ -543,7 +557,7 @@ CSS = """
 with gr.Blocks(delete_cache=(3600, 10800)) as demo:
-    gr.Markdown("## WAMU - Wan 2.2 I2V (14B) 🐌")
     gr.Markdown("#### ℹ️ **A Note on Performance:** This version prioritizes a straightforward setup over maximum speed, so performance may vary.")
     gr.Markdown("Run Wan 2.2 in just 4-8 steps, fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU")
@@ -552,19 +566,19 @@ with gr.Blocks(delete_cache=(3600, 10800)) as demo:
             input_image_component = gr.Image(type="pil", label="Input Image", sources=["upload", "clipboard"])
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
             duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
-            steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
             frame_multi = gr.Dropdown(
-                choices=[1, 2, 4, 8],
-                value=1,
-                label="Frame Rate Enhancer (Interpolation)",
-                info="Increases video fluidity. Example: 2x converts 16 FPS -> 32 FPS."
             )
             with gr.Accordion("Advanced Settings", open=False):
-                last_image_component = gr.Image(type="pil", label="Last Image (Optional)")
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, info="Used if any Guidance Scale > 1.", lines=3)
                 quality_slider = gr.Slider(minimum=1, maximum=10, step=1, value=6, label="Video Quality", info="If set to 10, the generated video may be too large and won't play in the Gradio preview.")
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage", info="Values above 1 increase GPU usage and may take longer to process.")
                 guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
                 scheduler_dropdown = gr.Dropdown(
@@ -576,7 +590,7 @@ with gr.Blocks(delete_cache=(3600, 10800)) as demo:
                 flow_shift_slider = gr.Slider(minimum=0.5, maximum=15.0, step=0.1, value=3.0, label="Flow Shift")
                 play_result_video = gr.Checkbox(label="Display result", value=True, interactive=True)
                 org_name = "TestOrganizationPleaseIgnore"
-                gr.Markdown(f"[ZeroGPU Help, Tips, and Troubleshooting](https://huggingface.co/datasets/{org_name}/help/blob/main/gpu_help.md)")
             generate_button = gr.Button("Generate Video", variant="primary")
@@ -626,4 +640,5 @@ if __name__ == "__main__":
     demo.queue().launch(
         mcp_server=True,
         css=CSS,
     )

 import time
 import gc
 import uuid
+from tqdm import tqdm
 import cv2
 import numpy as np
     # Process Frames
     # Load first frame into GPU
     I1 = to_tensor(frames_np[0])
+    total_steps = T - 1
+    with tqdm(total=total_steps, desc="Interpolating", unit="frame") as pbar:
+        for i in range(total_steps):
+            I0 = I1
+            # Add original frame to output
+            output_frames.append(from_tensor(I0))
+            # Load next frame
+            I1 = to_tensor(frames_np[i+1])
+            # Generate intermediate frames
+            mid_tensors = make_inference(I0, I1, n_interp)
+            # Append intermediate frames
+            for mid in mid_tensors:
+                output_frames.append(from_tensor(mid))
+            if (i + 1) % 50 == 0:
+                pbar.update(50)
+        pbar.update(total_steps % 50)
+        # Add the very last frame
+        output_frames.append(from_tensor(I1))
     # Cleanup
     del I0, I1, mid_tensors
     if guidance_scale > 1:
         gen_time = gen_time * 1.8
+    frame_factor = frame_multiplier // FIXED_FPS
+    if frame_factor > 1:
+        total_out_frames = (num_frames * frame_factor) - num_frames
         inter_time = (total_out_frames * 0.02)
         print(inter_time)
         gen_time += inter_time
     raw_frames_np = result.frames[0]  # Returns (T, H, W, C) float32
     pipe.scheduler = original_scheduler
+    frame_factor = frame_multiplier // FIXED_FPS
+    if frame_factor > 1:
         start = time.time()
+        print(f"Processing frames (RIFE Multiplier: {frame_factor}x)...")
         rife_model.device()
         rife_model.flownet = rife_model.flownet.half()
+        final_frames = interpolate_bits(raw_frames_np, multiplier=int(frame_factor))
         print("Interpolation time passed:", time.time() - start)
     else:
         final_frames = list(raw_frames_np)
+    final_fps = FIXED_FPS * int(frame_factor)
     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
         video_path = tmpfile.name
     start = time.time()
+    with tqdm(total=3, desc="Rendering Media", unit="clip") as pbar:
+        pbar.update(2)
+        export_to_video(final_frames, video_path, fps=final_fps, quality=quality)
+        pbar.update(1)
     print(f"Export time passed, {final_fps} FPS:", time.time() - start)
     return video_path, task_name
     quality=5,
     scheduler="UniPCMultistep",
     flow_shift=6.0,
+    frame_multiplier=16,
     video_component=True,
     progress=gr.Progress(track_tqdm=True),
 ):
 with gr.Blocks(delete_cache=(3600, 10800)) as demo:
+    gr.Markdown("## WAMU - Wan 2.2 I2V (14B) 🐢")
     gr.Markdown("#### ℹ️ **A Note on Performance:** This version prioritizes a straightforward setup over maximum speed, so performance may vary.")
     gr.Markdown("Run Wan 2.2 in just 4-8 steps, fp8 quantization & AoT compilation - compatible with 🧨 diffusers and ZeroGPU")
             input_image_component = gr.Image(type="pil", label="Input Image", sources=["upload", "clipboard"])
             prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
             duration_seconds_input = gr.Slider(minimum=MIN_DURATION, maximum=MAX_DURATION, step=0.1, value=3.5, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
             frame_multi = gr.Dropdown(
+                choices=[FIXED_FPS, FIXED_FPS*2, FIXED_FPS*4, FIXED_FPS*8],
+                value=FIXED_FPS,
+                label="Video Fluidity (Frames per Second)",
+                info="Extra frames will be generated using flow estimation, which estimates motion between frames to make the video smoother."
             )
             with gr.Accordion("Advanced Settings", open=False):
+                last_image_component = gr.Image(type="pil", label="Last Image (Optional)", sources=["upload", "clipboard"])
                 negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, info="Used if any Guidance Scale > 1.", lines=3)
                 quality_slider = gr.Slider(minimum=1, maximum=10, step=1, value=6, label="Video Quality", info="If set to 10, the generated video may be too large and won't play in the Gradio preview.")
                 seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
                 randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
+                steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
                 guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage", info="Values above 1 increase GPU usage and may take longer to process.")
                 guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
                 scheduler_dropdown = gr.Dropdown(
                 flow_shift_slider = gr.Slider(minimum=0.5, maximum=15.0, step=0.1, value=3.0, label="Flow Shift")
                 play_result_video = gr.Checkbox(label="Display result", value=True, interactive=True)
                 org_name = "TestOrganizationPleaseIgnore"
+                gr.Markdown(f"[ZeroGPU help, tips and troubleshooting](https://huggingface.co/datasets/{org_name}/help/blob/main/gpu_help.md)")
             generate_button = gr.Button("Generate Video", variant="primary")
     demo.queue().launch(
         mcp_server=True,
         css=CSS,
+        show_error=True,
     )