Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

5516eb1

verified ·

1 Parent(s): 4e367ef

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -39

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 First-Last-Frame-to-Video (FLF2V)
-Uses Accelerate’s balanced device mapping for optimal CPU/GPU placement.
 Author: <your-handle>
 """
@@ -16,37 +17,32 @@ import torchvision.transforms.functional as TF
 # ---------------------------------------------------------------------
 # CONFIG ----------------------------------------------------------------
-MODEL_ID       = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"  # or switch to 1.3B
-DTYPE          = torch.float16                            # or torch.bfloat16
-MAX_AREA       = 1280 * 720                                # ≤720p
-DEFAULT_FRAMES = 81                                        # ~5s @16fps
 # ----------------------------------------------------------------------
 def load_pipeline():
-    """Load & auto-map the pipeline across CPU/GPU with low CPU memory usage."""
-    # 1) load vision encoder (full precision)
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
-    # 2) load VAE (half precision)
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
-    # 3) load the video pipeline with Accelerate helpers
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
         image_encoder=image_encoder,
         torch_dtype=DTYPE,
-        low_cpu_mem_usage=True,   # lazy-load weights into CPU RAM
-        device_map="balanced",    # balanced CPU/GPU sharding
     )
-    # 4) use the fast Rust-backed processor
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
     return pipe
 PIPE = load_pipeline()
@@ -54,7 +50,6 @@ PIPE = load_pipeline()
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
-    """Resize while keeping aspect and patch-size multiples."""
     ar = img.height / img.width
     mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
@@ -62,29 +57,25 @@ def aspect_resize(img: Image.Image, max_area=MAX_AREA):
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
-    """Center-crop & resize to target H×W."""
     ratio = max(w / img.width, h / img.height)
-    img = img.resize(
-        (round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS
-    )
     return TF.center_crop(img, [h, w])
 # ----------------------------------------------------------------------
-# GENERATE --------------------------------------------------------------
-def generate(first_frame, last_frame, prompt, negative_prompt, steps,
-             guidance, num_frames, seed, fps):
-    # handle seed
-    if seed == -1:
-        seed = torch.seed()
-    gen = torch.Generator(device=PIPE.device).manual_seed(seed)
-    # preprocess frames
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
-    # inference
     output = PIPE(
         image=first_frame,
         last_image=last_frame,
@@ -97,20 +88,42 @@ def generate(first_frame, last_frame, prompt, negative_prompt, steps,
         guidance_scale=guidance,
         generator=gen,
     )
-    frames = output.frames[0]  # list[PIL.Image]
-    # export to mp4
     video_path = export_to_video(frames, fps=fps)
-    return video_path, seed
 # ----------------------------------------------------------------------
 # UI --------------------------------------------------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## Wan 2.1 FLF2V �� First & Last Frame → Video")
     with gr.Row():
         first_img = gr.Image(label="First frame", type="pil")
-        last_img  = gr.Image(label="Last frame", type="pil")
     prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
     negative = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
@@ -123,14 +136,16 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         seed       = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
     run_btn   = gr.Button("Generate")
-    video     = gr.Video(label="Result (.mp4)")
     used_seed = gr.Number(label="Seed used", interactive=False)
     run_btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
-        outputs=[video, used_seed]
     )
-if __name__ == "__main__":
     demo.launch()

 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 First-Last-Frame-to-Video (FLF2V)
+– shows streaming status updates
+– auto-downloads the generated video
 Author: <your-handle>
 """
 # ---------------------------------------------------------------------
 # CONFIG ----------------------------------------------------------------
+MODEL_ID       = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"
+DTYPE          = torch.float16
+MAX_AREA       = 1280 * 720
+DEFAULT_FRAMES = 81
 # ----------------------------------------------------------------------
 def load_pipeline():
+    """Load & shard the pipeline across CPU/GPU with Accelerate."""
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
         image_encoder=image_encoder,
         torch_dtype=DTYPE,
+        low_cpu_mem_usage=True,   # lazy‐load to CPU RAM
+        device_map="balanced",    # shard across CPU/GPU
     )
+    # switch to the fast Rust processor
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
     return pipe
 PIPE = load_pipeline()
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
     ar = img.height / img.width
     mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
+    img = img.resize((round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS)
     return TF.center_crop(img, [h, w])
 # ----------------------------------------------------------------------
+# GENERATE (streaming) --------------------------------------------------
+def generate(first_frame, last_frame, prompt, negative_prompt,
+             steps, guidance, num_frames, seed, fps):
+    # 1) Preprocess
+    yield None, None, "Preprocessing images..."
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
+    # 2) Inference
+    yield None, None, f"Running inference ({steps} steps)..."
+    if seed == -1:
+        seed = torch.seed()
+    gen = torch.Generator(device=PIPE.device).manual_seed(seed)
     output = PIPE(
         image=first_frame,
         last_image=last_frame,
         guidance_scale=guidance,
         generator=gen,
     )
+    frames = output.frames[0]
+    # 3) Export
+    yield None, None, "Exporting video..."
     video_path = export_to_video(frames, fps=fps)
+    # 4) Done
+    yield video_path, seed, "Done! Your browser will download the video."
 # ----------------------------------------------------------------------
 # UI --------------------------------------------------------------------
+with gr.Blocks() as demo:
+    # inject JS for auto-download
+    gr.HTML("""
+    <script>
+    function downloadVideo() {
+      const container = document.getElementById('output_video');
+      if (!container) return;
+      const vid = container.querySelector('video');
+      if (!vid) return;
+      const src = vid.currentSrc;
+      const a = document.createElement('a');
+      a.href = src;
+      a.download = 'output.mp4';
+      document.body.appendChild(a);
+      a.click();
+      document.body.removeChild(a);
+    }
+    </script>
+    """)
+    gr.Markdown("## Wan 2.1 FLF2V – Streaming progress + auto-download")
     with gr.Row():
         first_img = gr.Image(label="First frame", type="pil")
+        last_img  = gr.Image(label="Last frame",  type="pil")
     prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
     negative = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
         seed       = gr.Number(value=-1, precision=0, label="Seed (-1 = random)")
     run_btn   = gr.Button("Generate")
+    status    = gr.Textbox(label="Status", interactive=False)
+    video     = gr.Video(label="Result", elem_id="output_video")
     used_seed = gr.Number(label="Seed used", interactive=False)
     run_btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
+        outputs=[video, used_seed, status],
+        _js="downloadVideo"
     )
+    demo.queue()
     demo.launch()