Spaces:
Runtime error
Runtime error
import os | |
import random | |
from pathlib import Path | |
import numpy as np | |
import torch | |
from diffusers import AutoencoderKL, DDIMScheduler | |
from PIL import Image | |
from src.models.unet_2d_condition import UNet2DConditionModel | |
from src.models.unet_3d_emo import EMOUNet3DConditionModel | |
from src.models.whisper.audio2feature import load_audio_model | |
from src.pipelines.pipeline_echomimicv2 import EchoMimicV2Pipeline | |
from src.utils.util import save_videos_grid | |
from src.models.pose_encoder import PoseEncoder | |
from src.utils.dwpose_util import draw_pose_select_v2 | |
from moviepy.editor import VideoFileClip, AudioFileClip | |
import gradio as gr | |
from datetime import datetime | |
from torchao.quantization import quantize_, int8_weight_only | |
import gc | |
from src.inference import inference_pipeline | |
from src.utils import load_config | |
total_vram_in_gb = torch.cuda.get_device_properties(0).total_memory / 1073741824 | |
print(f'\033[32mCUDA版本:{torch.version.cuda}\033[0m') | |
print(f'\033[32mPytorch版本:{torch.__version__}\033[0m') | |
print(f'\033[32m显卡型号:{torch.cuda.get_device_name()}\033[0m') | |
print(f'\033[32m显存大小:{total_vram_in_gb:.2f}GB\033[0m') | |
print(f'\033[32m精度:float16\033[0m') | |
dtype = torch.float16 | |
if torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
print("cuda not available, using cpu") | |
device = "cpu" | |
ffmpeg_path = os.getenv('FFMPEG_PATH') | |
if ffmpeg_path is None: | |
print("please download ffmpeg-static and export to FFMPEG_PATH. \nFor example: export FFMPEG_PATH=./ffmpeg-4.4-amd64-static") | |
elif ffmpeg_path not in os.getenv('PATH'): | |
print("add ffmpeg to path") | |
os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}" | |
# Initialize the model | |
def initialize_model(): | |
config = load_config('./configs/prompts/infer.yaml') | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
return config, device | |
# Create the inference function | |
def generate_animation(audio_file, reference_image, config, device): | |
try: | |
# Run inference | |
output_video = inference_pipeline( | |
audio_path=audio_file.name, | |
reference_image_path=reference_image.name, | |
config=config, | |
device=device | |
) | |
return output_video | |
except Exception as e: | |
return str(e) | |
# Initialize the model | |
config, device = initialize_model() | |
# Create the Gradio interface | |
with gr.Blocks(title="EchoMimicV2: Audio-Driven Human Animation") as demo: | |
gr.Markdown(""" | |
# EchoMimicV2: Audio-Driven Human Animation | |
Upload a reference image and audio file to generate an animated video. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
label="Upload Audio", | |
type="filepath" | |
) | |
image_input = gr.Image( | |
label="Upload Reference Image", | |
type="filepath" | |
) | |
generate_btn = gr.Button("Generate Animation") | |
with gr.Column(): | |
output_video = gr.Video(label="Generated Animation") | |
generate_btn.click( | |
fn=generate_animation, | |
inputs=[audio_input, image_input, gr.State(config), gr.State(device)], | |
outputs=output_video | |
) | |
# Launch the app | |
demo.launch() | |