Spaces:

GeradeHouse
/

Wan2.1-FLF2V

Paused

App Files Files Community

GeradeHouse commited on Apr 25

Commit

29a7230

verified ·

1 Parent(s): 5a1047f

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -4,7 +4,11 @@ Gradio demo for Wan2.1 First-Last-Frame-to-Video (FLF2V)
 Author: <your-handle>
 """
-import os, tempfile, numpy as np, torch, gradio as gr
 from diffusers import WanImageToVideoPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
 from transformers import CLIPVisionModel
@@ -21,9 +25,11 @@ DEFAULT_FRAMES = 81                                   # ≈ 5 s at 16 fps
 def load_pipeline():
     """Lazy-load the huge model once per process."""
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
@@ -35,9 +41,11 @@ def load_pipeline():
     )
     # memory helpers for ≤ 24 GB cards / HF T4-medium
-    pipe.enable_model_cpu_offload()          # paged UNet blocks
-    pipe.enable_vae_slicing()                # reduces VAE RAM spikes
-    # Optional (needs xformers): pipe.enable_xformers_memory_efficient_attention()
     return pipe.to("cuda" if torch.cuda.is_available() else "cpu")
 PIPE = load_pipeline()
@@ -54,9 +62,10 @@ def aspect_resize(img: Image.Image, max_area=MAX_AREA):
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
-    img = img.resize((round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS)
-    img = TF.center_crop(img, [h, w])
-    return img
 # ----------------------------------------------------------------------
 # GENERATE --------------------------------------------------------------
@@ -67,11 +76,13 @@ def generate(first_frame, last_frame, prompt, negative_prompt, steps,
         seed = torch.seed()
     generator = torch.Generator(device=PIPE.device).manual_seed(seed)
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
-    out = PIPE(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
@@ -82,9 +93,11 @@ def generate(first_frame, last_frame, prompt, negative_prompt, steps,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=generator,
-    ).frames[0]  # list[pillow]
-    video_path = export_to_video(out, fps=fps)
     return video_path, seed
 # ----------------------------------------------------------------------

 Author: <your-handle>
 """
+import os
+import tempfile
+import numpy as np
+import torch
+import gradio as gr
 from diffusers import WanImageToVideoPipeline, AutoencoderKLWan
 from diffusers.utils import export_to_video
 from transformers import CLIPVisionModel
 def load_pipeline():
     """Lazy-load the huge model once per process."""
+    # image encoder in full precision
     image_encoder = CLIPVisionModel.from_pretrained(
         MODEL_ID, subfolder="image_encoder", torch_dtype=torch.float32
     )
+    # VAE in reduced precision
     vae = AutoencoderKLWan.from_pretrained(
         MODEL_ID, subfolder="vae", torch_dtype=DTYPE
     )
     )
     # memory helpers for ≤ 24 GB cards / HF T4-medium
+    pipe.enable_model_cpu_offload()       # paged UNet blocks
+    pipe.vae.enable_slicing()             # reduce VAE peak RAM
+    # Optional: if you have xformers installed
+    # pipe.enable_xformers_memory_efficient_attention()
     return pipe.to("cuda" if torch.cuda.is_available() else "cpu")
 PIPE = load_pipeline()
 def center_crop_resize(img: Image.Image, h, w):
     ratio = max(w / img.width, h / img.height)
+    img = img.resize(
+        (round(img.width * ratio), round(img.height * ratio)), Image.LANCZOS
+    )
+    return TF.center_crop(img, [h, w])
 # ----------------------------------------------------------------------
 # GENERATE --------------------------------------------------------------
         seed = torch.seed()
     generator = torch.Generator(device=PIPE.device).manual_seed(seed)
+    # preprocess
     first_frame, h, w = aspect_resize(first_frame)
     if last_frame.size != first_frame.size:
         last_frame = center_crop_resize(last_frame, h, w)
+    # run pipeline
+    result = PIPE(
         image=first_frame,
         last_image=last_frame,
         prompt=prompt,
         num_inference_steps=steps,
         guidance_scale=guidance,
         generator=generator,
+    )
+    frames = result.frames[0]  # list of PIL images
+    # export
+    video_path = export_to_video(frames, fps=fps)
     return video_path, seed
 # ----------------------------------------------------------------------