Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

1c8aab2

verified ·

1 Parent(s): 9c8f4c5

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -45

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – full streaming progress
 Author: <your-handle>
 """
@@ -22,17 +23,20 @@ DEFAULT_FRAMES = 81
 # ----------------------------------------------------------------------
 def load_pipeline(progress):
-    """Load model components with progress updates."""
-    # 0% → 5%: start loading
-    progress(0.0, desc="Initializing model load…")
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
-    progress(0.02, desc="Image encoder loaded")
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
-    progress(0.04, desc="VAE loaded")
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
@@ -41,81 +45,82 @@ def load_pipeline(progress):
         low_cpu_mem_usage=True,
         device_map="balanced",
     )
-    progress(0.06, desc="Pipeline assembled")
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
-    progress(0.08, desc="Processor ready")
-    return pipe
-# Preload nothing here—model loads in-function to stream progress.
-# ----------------------------------------------------------------------
-# UTILS ----------------------------------------------------------------
-def aspect_resize(img: Image.Image, max_area=MAX_AREA):
     ar = img.height / img.width
-    mod = PIPE.vae_scale_factor_spatial * PIPE.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
     w = round(np.sqrt(max_area / ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
-def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
-    img = img.resize((round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS)
     return TF.center_crop(img, [h, w])
-# ----------------------------------------------------------------------
-# GENERATE --------------------------------------------------------------
 def generate(first_frame, last_frame, prompt, negative_prompt,
              steps, guidance, num_frames, seed, fps,
-             progress=gr.Progress()):  # ← inject Gradio progress tracker 3
-    # 1) Load the pipeline with streaming
     pipe = load_pipeline(progress)
-    # 2) Preprocess images
-    progress(0.10, desc="Preprocessing frames…")
-    first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
-        last_frame = center_crop_resize(last_frame, h, w)
-    progress(0.12, desc="Frames ready")
-    # 3) Inference with per-step updates
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=pipe.device).manual_seed(seed)
-    def _callback(step, timestep, latents):
-        # Map step to [0.12…0.90] fraction of bar
-        frac = 0.12 + 0.78 * (step + 1) / steps
-        progress(frac, desc=f"Inference: step {step+1}/{steps}")
-    progress(0.12, desc="Starting inference…")
     output = pipe(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         negative_prompt=negative_prompt or None,
-        height=h, width=w,
         num_frames=num_frames,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=gen,
-        callback_on_step_end=_callback,
-        callback_steps=1,   # call our callback every step 4
     )
     frames = output.frames[0]
-    # 4) Export
-    progress(0.92, desc="Building video…")
     video_path = export_to_video(frames, fps=fps)
-    # 5) Complete!
-    progress(1.0, desc="Done!")
     return video_path
-# ----------------------------------------------------------------------
-# UI --------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("## Wan2.1 FLF2V – Full Streaming Progress")
@@ -123,8 +128,8 @@ with gr.Blocks() as demo:
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
-    prompt   = gr.Textbox(label="Prompt")
-    negative = gr.Textbox(label="Negative prompt (optional)")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Steps")
@@ -135,13 +140,12 @@ with gr.Blocks() as demo:
     video = gr.Video(label="Result (.mp4)")
-    # bind generator to button; progress bar overlays on the video output
-    run_btn = gr.Button("Generate")
-    run_btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
         outputs=[video],
     )
-    demo.queue()  # enable progress tracking
     demo.launch()

 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – full streaming progress
+No globals: pipeline, resize utils all use the local `pipe`.
 Author: <your-handle>
 """
 # ----------------------------------------------------------------------
 def load_pipeline(progress):
+    """Load & shard the pipeline across CPU/GPU with streaming progress."""
+    progress(0.00, desc="Init: loading image encoder…")
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
+    progress(0.10, desc="Loaded image encoder")
+    progress(0.10, desc="Loading VAE…")
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
+    progress(0.20, desc="Loaded VAE")
+    progress(0.20, desc="Assembling pipeline…")
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
         low_cpu_mem_usage=True,
         device_map="balanced",
     )
+    progress(0.30, desc="Pipeline assembled")
+    progress(0.30, desc="Loading fast image processor…")
     pipe.image_processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
+    progress(0.40, desc="Processor ready")
+    return pipe
+def aspect_resize(img: Image.Image, pipe, max_area=MAX_AREA):
+    """Resize while respecting model patch multiples, using `pipe` for scale."""
     ar = img.height / img.width
+    mod = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size[1]
     h = round(np.sqrt(max_area * ar)) // mod * mod
     w = round(np.sqrt(max_area / ar)) // mod * mod
     return img.resize((w, h), Image.LANCZOS), h, w
+def center_crop_resize(img: Image.Image, pipe, h, w):
+    """Center-crop & resize to H×W, using same Lanczos filter."""
     ratio = max(w / img.width, h / img.height)
+    img = img.resize(
+        (round(img.width * ratio), round(img.height * ratio)),
+        Image.LANCZOS
+    )
     return TF.center_crop(img, [h, w])
 def generate(first_frame, last_frame, prompt, negative_prompt,
              steps, guidance, num_frames, seed, fps,
+             progress=gr.Progress()):  # Gradio progress hook
+    # 1) Load & shard pipeline
     pipe = load_pipeline(progress)
+    # 2) Preprocess
+    progress(0.45, desc="Preprocessing first frame…")
+    first_frame, h, w = aspect_resize(first_frame, pipe)
     if last_frame.size != first_frame.size:
+        progress(0.50, desc="Preprocessing last frame…")
+        last_frame = center_crop_resize(last_frame, pipe, h, w)
+    progress(0.55, desc="Frames ready")
+    # 3) Run inference with per-step callbacks
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=pipe.device).manual_seed(seed)
+    def _cb(step, timestep, latents):
+        frac = 0.55 + 0.35 * ((step + 1) / steps)
+        progress(frac, desc=f"Inference step {step+1}/{steps}")
+    progress(0.55, desc="Starting inference…")
     output = pipe(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         negative_prompt=negative_prompt or None,
+        height=h,
+        width=w,
         num_frames=num_frames,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=gen,
+        callback_on_step_end=_cb,
+        callback_steps=1,
     )
     frames = output.frames[0]
+    # 4) Export video
+    progress(0.92, desc="Exporting video…")
     video_path = export_to_video(frames, fps=fps)
+    # 5) Done
+    progress(1.0, desc="Complete!")
     return video_path
 with gr.Blocks() as demo:
     gr.Markdown("## Wan2.1 FLF2V – Full Streaming Progress")
         first_img = gr.Image(label="First frame", type="pil")
         last_img  = gr.Image(label="Last frame",  type="pil")
+    prompt   = gr.Textbox(label="Prompt", placeholder="A blue bird takes off…")
+    negative = gr.Textbox(label="Negative prompt (optional)", placeholder="ugly, blurry")
     with gr.Accordion("Advanced parameters", open=False):
         steps      = gr.Slider(10, 50, value=30, step=1, label="Steps")
     video = gr.Video(label="Result (.mp4)")
+    btn = gr.Button("Generate")
+    btn.click(
         fn=generate,
         inputs=[first_img, last_img, prompt, negative, steps, guidance, num_frames, seed, fps],
         outputs=[video],
     )
+    demo.queue()  # enable streaming updates
     demo.launch()