Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

c83344b

verified ·

1 Parent(s): 29a7230

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -4,8 +4,6 @@ Gradio demo for Wan2.1 First-Last-Frame-to-Video (FLF2V)
 Author: <your-handle>
 """
-import os
-import tempfile
 import numpy as np
 import torch
 import gradio as gr
@@ -19,12 +17,12 @@ import torchvision.transforms.functional as TF
 # CONFIG ----------------------------------------------------------------
 MODEL_ID = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"  # switch to 1.3B if needed
 DTYPE = torch.float16                                 # or torch.bfloat16 on AMP-friendly GPUs
-MAX_AREA = 1280 * 720                                 # keep ≤ 720 p
-DEFAULT_FRAMES = 81                                   # ≈ 5 s at 16 fps
 # ----------------------------------------------------------------------
 def load_pipeline():
-    """Lazy-load the huge model once per process."""
     # image encoder in full precision
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
@@ -41,8 +39,8 @@ def load_pipeline():
     )
     # memory helpers for ≤ 24 GB cards / HF T4-medium
-    pipe.enable_model_cpu_offload()       # paged UNet blocks
-    pipe.vae.enable_slicing()             # reduce VAE peak RAM
     # Optional: if you have xformers installed
     # pipe.enable_xformers_memory_efficient_attention()
@@ -53,7 +51,7 @@ PIPE = load_pipeline()
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
-    """Resize while respecting model patch size (multiple of 8*transformer patch)."""
     ar = img.height / img.width
     mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
@@ -61,6 +59,7 @@ def aspect_resize(img: Image.Image, max_area=MAX_AREA):
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
     img = img.resize(
         (round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS
@@ -76,13 +75,13 @@ def generate(first_frame, last_frame, prompt, negative_prompt, steps,
         seed = torch.seed()
     generator = torch.Generator(device=PIPE.device).manual_seed(seed)
-    # preprocess
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
-    # run pipeline
-    result = PIPE(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
@@ -94,9 +93,9 @@ def generate(first_frame, last_frame, prompt, negative_prompt, steps,
         guidance_scale=guidance,
         generator=generator,
     )
-    frames = result.frames[0]  # list of PIL images
-    # export
     video_path = export_to_video(frames, fps=fps)
     return video_path, seed
@@ -109,8 +108,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
-    prompt         = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
-    negative       = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Sampling steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance scale")

 Author: <your-handle>
 """
 import numpy as np
 import torch
 import gradio as gr
 # CONFIG ----------------------------------------------------------------
 MODEL_ID = "Wan-AI/Wan2.1-FLF2V-14B-720P-diffusers"  # switch to 1.3B if needed
 DTYPE = torch.float16                                 # or torch.bfloat16 on AMP-friendly GPUs
+MAX_AREA = 1280 * 720                                 # keep ≤ 720p
+DEFAULT_FRAMES = 81                                   # ≈ 5s at 16 fps
 # ----------------------------------------------------------------------
 def load_pipeline():
+    """Lazy‐load the huge model once per process."""
     # image encoder in full precision
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
     # memory helpers for ≤ 24 GB cards / HF T4-medium
+    pipe.enable_model_cpu_offload()    # page UNet blocks off GPU
+    pipe.vae.enable_slicing()          # reduce VAE peak RAM
     # Optional: if you have xformers installed
     # pipe.enable_xformers_memory_efficient_attention()
 # ----------------------------------------------------------------------
 # UTILS ----------------------------------------------------------------
 def aspect_resize(img: Image.Image, max_area=MAX_AREA):
+    """Resize while respecting model patch size (multiple of transformer patch)."""
     ar = img.height / img.width
     mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
 def center_crop_resize(img: Image.Image, h, w):
+    """Center‐crop & resize to target H×W."""
     ratio = max(w / img.width, h / img.height)
     img = img.resize(
         (round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS
         seed = torch.seed()
     generator = torch.Generator(device=PIPE.device).manual_seed(seed)
+    # preprocess inputs
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
+    # run the pipeline
+    output = PIPE(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         guidance_scale=guidance,
         generator=generator,
     )
+    frames = output.frames[0]  # list[PIL.Image]
+    # export to .mp4
     video_path = export_to_video(frames, fps=fps)
     return video_path, seed
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
+    prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
+    negative = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Sampling steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1, label="Guidance scale")