SkyReels / app.py
1inkusFace's picture
Update app.py
28de867 verified
import spaces
import gradio as gr
import argparse
import sys
import time
import os
import random
from skyreelsinfer.offload import OffloadConfig
from skyreelsinfer import TaskType
from skyreelsinfer.skyreels_video_infer import SkyReelsVideoSingleGpuInfer
from diffusers.utils import export_to_video
from diffusers.utils import load_image
from PIL import Image
import torch
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
torch.backends.cuda.matmul.allow_fp16_reduced_precision_reduction = False
torch.backends.cudnn.allow_tf32 = True
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = False
torch.backends.cuda.preferred_blas_library="cublas"
torch.backends.cuda.preferred_linalg_library="cusolver"
torch.set_float32_matmul_precision("high")
os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
os.environ["SAFETENSORS_FAST_GPU"] = "1"
os.putenv("TOKENIZERS_PARALLELISM","False")
def init_predictor():
global predictor
predictor = SkyReelsVideoSingleGpuInfer(
task_type= TaskType.I2V,
model_id="Skywork/SkyReels-V1-Hunyuan-I2V",
quant_model=False,
is_offload=False,
offload_config=OffloadConfig(
high_cpu_memory=True,
parameters_level=True,
compiler_transformer=False,
)
)
@spaces.GPU(duration=120)
def generate_video(prompt, image, size, steps, frames, guidance_scale, progress=gr.Progress(track_tqdm=True) ):
print(f"image:{type(image)}")
random.seed(time.time())
seed = int(random.randrange(4294967294))
kwargs = {
"prompt": prompt,
"height": size,
"width": size,
"num_frames": frames,
"num_inference_steps": steps,
"seed": seed,
"guidance_scale": guidance_scale,
"embedded_guidance_scale": 1.0,
"negative_prompt": "Aerial view, aerial view, overexposed, low quality, deformation, a poor composition, bad hands, bad teeth, bad eyes, bad limbs, distortion",
"cfg_for": False,
}
assert image is not None, "please input image"
img = load_image(image=image)
img.resize((size,size), Image.LANCZOS)
kwargs["image"] = img
output = predictor.inference(kwargs)
save_dir = f"./"
video_out_file = f"{seed}.mp4"
print(f"generate video, local path: {video_out_file}")
export_to_video(output, video_out_file, fps=24)
return video_out_file
def create_gradio_interface():
with gr.Blocks() as demo:
with gr.Row():
image = gr.Image(label="Upload Image", type="filepath")
prompt = gr.Textbox(label="Input Prompt")
size = gr.Slider(
label="Size",
minimum=256,
maximum=1024,
step=16,
value=368,
)
frames = gr.Slider(
label="Number of Frames",
minimum=16,
maximum=256,
step=12,
value=48,
)
steps = gr.Slider(
label="Number of Steps",
minimum=1,
maximum=96,
step=1,
value=20,
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=16.0,
step=.1,
value=6.0,
)
submit_button = gr.Button("Generate Video")
output_video = gr.Video(label="Generated Video")
submit_button.click(
fn=generate_video,
inputs=[prompt, image, size, steps, frames, guidance_scale],
outputs=[output_video],
)
return demo
if __name__ == "__main__":
init_predictor()
demo = create_gradio_interface()
demo.launch()