Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

f956532

verified ·

1 Parent(s): f6d3581

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – First & Last Frame → Video
-Streams all HF-Hub & Diffusers tqdm bars, caches the model,
 and provides a direct download link for the MP4.
 """
@@ -10,7 +10,7 @@ import numpy as np
 import torch
 import gradio as gr
 from PIL import Image
-from transformers import CLIPVisionModel, CLIPImageProcessor
 from diffusers import WanImageToVideoPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
 import torchvision.transforms.functional as TF
@@ -34,15 +34,15 @@ def load_pipeline():
     vision = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
-    # 2) fast processor
-    processor = CLIPImageProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
     # 3) VAE (half precision)
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
-    # 4) pipeline assembly
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
@@ -52,6 +52,7 @@ def load_pipeline():
     )
     # 5) CPU offload for large models
     pipe.enable_model_cpu_offload()
     return pipe.to("cuda" if torch.cuda.is_available() else "cpu")
 # -----------------------------------------------------------------------------
@@ -70,7 +71,7 @@ def center_crop_resize(img: Image.Image, h: int, w: int):
     return TF.center_crop(img2, [h, w])
 # -----------------------------------------------------------------------------
-# GENERATION (stream all tqdm → Gradio)
 # -----------------------------------------------------------------------------
 def generate(
     first_frame: Image.Image,
@@ -85,23 +86,23 @@ def generate(
     progress=gr.Progress(track_tqdm=True),
 ):
     global PIPE
-    # lazy load
     if PIPE is None:
         progress(0, desc="Loading model…")
         PIPE = load_pipeline()
-    # seed
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=PIPE.device).manual_seed(seed)
     # preprocess
-    progress(0, desc="Preprocessing…")
     frame1, h, w = aspect_resize(first_frame)
     if last_frame.size != frame1.size:
         last_frame = center_crop_resize(last_frame, h, w)
-    # inference (all tqdm bars appear in progress)
     result = PIPE(
         image=frame1,
         last_image=last_frame,
@@ -116,7 +117,7 @@ def generate(
     )
     frames = result.frames[0]
-    # export
     progress(1.0, desc="Exporting video…")
     out_path = export_to_video(frames, fps=fps)
     return out_path, seed
@@ -135,11 +136,11 @@ with gr.Blocks() as demo:
     negative = gr.Textbox(label="Negative prompt (optional)")
     with gr.Accordion("Advanced parameters", open=False):
-        steps      = gr.Slider(10, 50, value=30, step=1,  label="Steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1,label="Guidance")
         num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, step=1,label="Frames")
-        fps        = gr.Slider(4, 30, value=16, step=1,           label="FPS")
-        seed       = gr.Number(value=-1, precision=0,           label="Seed")
     run_btn  = gr.Button("Generate")
     download = gr.File(label="Download video (.mp4)")
@@ -152,5 +153,5 @@ with gr.Blocks() as demo:
         concurrency_limit=1
     )
-# enable progress streaming
 demo.queue().launch()

 #!/usr/bin/env python
 """
 Gradio demo for Wan2.1 FLF2V – First & Last Frame → Video
+Streams all HF-Hub & Diffusers tqdm bars, caches the model,
 and provides a direct download link for the MP4.
 """
 import torch
 import gradio as gr
 from PIL import Image
+from transformers import CLIPVisionModel, CLIPProcessor
 from diffusers import WanImageToVideoPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
 import torchvision.transforms.functional as TF
     vision = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
+    # 2) unified CLIP processor (fast Rust-backed+tokenizer stub)
+    processor = CLIPProcessor.from_pretrained(
         MODEL_ID, subfolder="image_processor", use_fast=True
     )
     # 3) VAE (half precision)
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
+    # 4) assemble pipeline
     pipe = WanImageToVideoPipeline.from_pretrained(
         MODEL_ID,
         vae=vae,
     )
     # 5) CPU offload for large models
     pipe.enable_model_cpu_offload()
+    # return on correct device
     return pipe.to("cuda" if torch.cuda.is_available() else "cpu")
 # -----------------------------------------------------------------------------
     return TF.center_crop(img2, [h, w])
 # -----------------------------------------------------------------------------
+# GENERATION (streams all tqdm → Gradio)
 # -----------------------------------------------------------------------------
 def generate(
     first_frame: Image.Image,
     progress=gr.Progress(track_tqdm=True),
 ):
     global PIPE
+    # lazy load once
     if PIPE is None:
         progress(0, desc="Loading model…")
         PIPE = load_pipeline()
+    # ensure reproducibility
     if seed == -1:
         seed = torch.seed()
     gen = torch.Generator(device=PIPE.device).manual_seed(seed)
     # preprocess
+    progress(0, desc="Preprocessing frames…")
     frame1, h, w = aspect_resize(first_frame)
     if last_frame.size != frame1.size:
         last_frame = center_crop_resize(last_frame, h, w)
+    # inference (all internal tqdm bars streamed)
     result = PIPE(
         image=frame1,
         last_image=last_frame,
     )
     frames = result.frames[0]
+    # export to MP4
     progress(1.0, desc="Exporting video…")
     out_path = export_to_video(frames, fps=fps)
     return out_path, seed
     negative = gr.Textbox(label="Negative prompt (optional)")
     with gr.Accordion("Advanced parameters", open=False):
+        steps      = gr.Slider(10, 50, value=30,    step=1, label="Steps")
         guidance   = gr.Slider(0.0, 10.0, value=5.5, step=0.1,label="Guidance")
         num_frames = gr.Slider(16, 129, value=DEFAULT_FRAMES, step=1,label="Frames")
+        fps        = gr.Slider(4, 30, value=16,     step=1, label="FPS")
+        seed       = gr.Number(value=-1, precision=0,          label="Seed")
     run_btn  = gr.Button("Generate")
     download = gr.File(label="Download video (.mp4)")
         concurrency_limit=1
     )
+# enable queue + tqdm streaming
 demo.queue().launch()