Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

9c8f4c5

verified ·

1 Parent(s): 5516eb1

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -57

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 """
-Gradio demo for Wan2.1 First-Last-Frame-to-Video (FLF2V)
-– shows streaming status updates
-– auto-downloads the generated video
 Author: <your-handle>
 """
@@ -23,29 +21,34 @@ MAX_AREA       = 1280 * 720
 DEFAULT_FRAMES = 81
 # ----------------------------------------------------------------------
-def load_pipeline():
-    """Load & shard the pipeline across CPU/GPU with Accelerate."""
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
         image_encoder=image_encoder,
         torch_dtype=DTYPE,
-        low_cpu_mem_usage=True,   # lazy‐load to CPU RAM
-        device_map="balanced",    # shard across CPU/GPU
     )
-    # switch to the fast Rust processor
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
     return pipe
-PIPE = load_pipeline()
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
@@ -62,90 +65,83 @@ def center_crop_resize(img: Image.Image, h, w):
     return TF.center_crop(img, [h, w])
 # ----------------------------------------------------------------------
-# GENERATE (streaming) --------------------------------------------------
 def generate(first_frame, last_frame, prompt, negative_prompt,
-             steps, guidance, num_frames, seed, fps):
-    # 1) Preprocess
-    yield None, None, "Preprocessing images..."
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
-    # 2) Inference
-    yield None, None, f"Running inference ({steps} steps)..."
     if seed == -1:
         seed = torch.seed()
-    gen = torch.Generator(device=PIPE.device).manual_seed(seed)
-    output = PIPE(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         negative_prompt=negative_prompt or None,
-        height=h,
-        width=w,
         num_frames=num_frames,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=gen,
     )
     frames = output.frames[0]
-    # 3) Export
-    yield None, None, "Exporting video..."
     video_path = export_to_video(frames, fps=fps)
-    # 4) Done
-    yield video_path, seed, "Done! Your browser will download the video."
 # ----------------------------------------------------------------------
 # UI --------------------------------------------------------------------
 with gr.Blocks() as demo:
-    # inject JS for auto-download
-    gr.HTML("""
-    <script>
-    function downloadVideo() {
-      const container = document.getElementById('output_video');
-      if (!container) return;
-      const vid = container.querySelector('video');
-      if (!vid) return;
-      const src = vid.currentSrc;
-      const a = document.createElement('a');
-      a.href = src;
-      a.download = 'output.mp4';
-      document.body.appendChild(a);
-      a.click();
-      document.body.removeChild(a);
-    }
-    </script>
-    """)
-    gr.Markdown("## Wan 2.1 FLF2V – Streaming progress + auto-download")
     with gr.Row():
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
-    prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
-    negative = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
     with gr.Accordion("Advanced parameters", open=False):
-        steps      = gr.Slider(10, 50, value=30, step=1, label="Sampling steps")
-        guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance scale")
-        num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, step=1, label="Frames")
-        fps        = gr.Slider(4, 30, value=16, step=1, label="FPS (export)")
-        seed       = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
-    run_btn   = gr.Button("Generate")
-    status    = gr.Textbox(label="Status", interactive=False)
-    video     = gr.Video(label="Result", elem_id="output_video")
-    used_seed = gr.Number(label="Seed used", interactive=False)
     run_btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
-        outputs=[video, used_seed, status],
-        _js="downloadVideo"
     )
-    demo.queue()
     demo.launch()

 #!/usr/bin/env python
 """
+Gradio demo for Wan2.1 FLF2V – full streaming progress
 Author: <your-handle>
 """
 DEFAULT_FRAMES = 81
 # ----------------------------------------------------------------------
+def load_pipeline(progress):
+    """Load model components with progress updates."""
+    # 0% → 5%: start loading
+    progress(0.0, desc="Initializing model load…")
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
+    progress(0.02, desc="Image encoder loaded")
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
+    progress(0.04, desc="VAE loaded")
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
         image_encoder=image_encoder,
         torch_dtype=DTYPE,
+        low_cpu_mem_usage=True,
+        device_map="balanced",
     )
+    progress(0.06, desc="Pipeline assembled")
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
+    progress(0.08, desc="Processor ready")
     return pipe
+# Preload nothing here—model loads in-function to stream progress.
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
     return TF.center_crop(img, [h, w])
 # ----------------------------------------------------------------------
+# GENERATE --------------------------------------------------------------
 def generate(first_frame, last_frame, prompt, negative_prompt,
+             steps, guidance, num_frames, seed, fps,
+             progress=gr.Progress()):  # ← inject Gradio progress tracker 3
+    # 1) Load the pipeline with streaming
+    pipe = load_pipeline(progress)
+    # 2) Preprocess images
+    progress(0.10, desc="Preprocessing frames…")
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
+    progress(0.12, desc="Frames ready")
+    # 3) Inference with per-step updates
     if seed == -1:
         seed = torch.seed()
+    gen = torch.Generator(device=pipe.device).manual_seed(seed)
+    def _callback(step, timestep, latents):
+        # Map step to [0.12…0.90] fraction of bar
+        frac = 0.12 + 0.78 * (step + 1) / steps
+        progress(frac, desc=f"Inference: step {step+1}/{steps}")
+    progress(0.12, desc="Starting inference…")
+    output = pipe(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         negative_prompt=negative_prompt or None,
+        height=h, width=w,
         num_frames=num_frames,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=gen,
+        callback_on_step_end=_callback,
+        callback_steps=1,   # call our callback every step 4
     )
     frames = output.frames[0]
+    # 4) Export
+    progress(0.92, desc="Building video…")
     video_path = export_to_video(frames, fps=fps)
+    # 5) Complete!
+    progress(1.0, desc="Done!")
+    return video_path
 # ----------------------------------------------------------------------
 # UI --------------------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("## Wan2.1 FLF2V – Full Streaming Progress")
     with gr.Row():
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
+    prompt   = gr.Textbox(label="Prompt")
+    negative = gr.Textbox(label="Negative prompt (optional)")
     with gr.Accordion("Advanced parameters", open=False):
+        steps      = gr.Slider(10, 50, value=30, step=1, label="Steps")
+        guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance")
+        num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, label="Frames")
+        fps        = gr.Slider(4, 30, value=16, label="FPS")
+        seed       = gr.Number(value=-1, precision=0, label="Seed")
+    video = gr.Video(label="Result (.mp4)")
+    # bind generator to button; progress bar overlays on the video output
+    run_btn = gr.Button("Generate")
     run_btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
+        outputs=[video],
     )
+    demo.queue()  # enable progress tracking
     demo.launch()