Spaces:
Running
on
Zero
Running
on
Zero
#!/usr/bin/env python3 | |
""" | |
Cosmos-Predict2 for Hugging Face Spaces ZeroGPU | |
""" | |
import subprocess | |
import os | |
# Install flash-attn for better performance | |
subprocess.run( | |
"pip install flash-attn --no-build-isolation", | |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
shell=True | |
) | |
import gradio as gr | |
import spaces | |
import torch | |
from transformers import AutoModelForCausalLM, SiglipProcessor | |
import random | |
import gc | |
import warnings | |
# Try to import Cosmos-specific pipeline, fall back to generic if not available | |
try: | |
from diffusers import Cosmos2TextToImagePipeline | |
COSMOS_PIPELINE_AVAILABLE = True | |
print("β Cosmos2TextToImagePipeline available") | |
except ImportError: | |
from diffusers import DiffusionPipeline | |
COSMOS_PIPELINE_AVAILABLE = False | |
print("β οΈ Cosmos2TextToImagePipeline not available, using DiffusionPipeline with trust_remote_code") | |
# Suppress warnings for cleaner output | |
warnings.filterwarnings("ignore", category=UserWarning) | |
warnings.filterwarnings("ignore", category=FutureWarning) | |
# Add flash_attention_2 to the safeguard model for better performance | |
def patch_from_pretrained(cls): | |
orig_method = cls.from_pretrained | |
def new_from_pretrained(*args, **kwargs): | |
kwargs.setdefault("attn_implementation", "flash_attention_2") | |
kwargs.setdefault("torch_dtype", torch.bfloat16) | |
return orig_method(*args, **kwargs) | |
cls.from_pretrained = new_from_pretrained | |
patch_from_pretrained(AutoModelForCausalLM) | |
# Add a `use_fast` to the safeguard image processor | |
def patch_processor_fast(cls): | |
orig_method = cls.from_pretrained | |
def new_from_pretrained(*args, **kwargs): | |
kwargs.setdefault("use_fast", True) | |
return orig_method(*args, **kwargs) | |
cls.from_pretrained = new_from_pretrained | |
patch_processor_fast(SiglipProcessor) | |
print("π Loading Cosmos-Predict2 model...") | |
# Handle authentication for gated model | |
try: | |
from huggingface_hub import login | |
import os | |
# Try to login with token from environment variable | |
hf_token = os.getenv("HF_TOKEN") | |
if hf_token: | |
login(token=hf_token) | |
print("β Authenticated with Hugging Face") | |
else: | |
print("β οΈ No HF_TOKEN found, trying without authentication...") | |
except Exception as e: | |
print(f"β οΈ Authentication failed: {e}") | |
# Load the model at startup | |
model_id = "nvidia/Cosmos-Predict2-2B-Text2Image" | |
try: | |
if COSMOS_PIPELINE_AVAILABLE: | |
print("π Loading with Cosmos2TextToImagePipeline...") | |
try: | |
# Try loading with safety checker first | |
pipe = Cosmos2TextToImagePipeline.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
use_auth_token=True # Use authentication token | |
) | |
except ImportError as e: | |
if "cosmos_guardrail" in str(e): | |
print("β οΈ cosmos_guardrail not available, trying without safety checker...") | |
# Try loading without safety checker | |
pipe = Cosmos2TextToImagePipeline.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
use_auth_token=True, | |
safety_checker=None, | |
requires_safety_checker=False | |
) | |
else: | |
raise e | |
else: | |
print("π Loading with DiffusionPipeline (trust_remote_code=True)...") | |
pipe = DiffusionPipeline.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True, | |
use_auth_token=True # Use authentication token | |
) | |
pipe.to("cuda") | |
print("β Cosmos-Predict2 model loaded successfully!") | |
except Exception as e: | |
print(f"β Failed to load Cosmos model: {e}") | |
print("π This is likely due to the model being gated/restricted or missing dependencies") | |
print("π Please check the Setup Guide for authentication instructions") | |
# For demo purposes, we could fall back to a different model | |
# But for now, let's just exit gracefully | |
raise e | |
# Default negative prompt for better quality | |
DEFAULT_NEGATIVE_PROMPT = "The video captures a series of frames showing ugly scenes, static with no motion, motion blur, over-saturation, shaky footage, low resolution, grainy texture, pixelated images, poorly lit areas, underexposed and overexposed scenes, poor color balance, washed out colors, choppy sequences, jerky movements, low frame rate, artifacting, color banding, unnatural transitions, outdated special effects, fake elements, unconvincing visuals, poorly edited content, jump cuts, visual noise, and flickering. Overall, the video is of poor quality." | |
def get_memory_info(): | |
"""Get current memory usage""" | |
if torch.cuda.is_available(): | |
vram_used = torch.cuda.memory_allocated(0) / 1024**3 | |
return f"GPU Memory Used: {vram_used:.1f}GB (H200 - 70GB Available)" | |
else: | |
return "GPU: Not allocated (ZeroGPU will assign when needed)" | |
# 2 minutes for generation | |
def generate_image(prompt, negative_prompt="", num_steps=25, guidance_scale=7.5, | |
seed=-1, width=1024, height=1024, randomize_seed=True, | |
progress=gr.Progress(track_tqdm=True)): | |
"""Generate image with ZeroGPU H200""" | |
try: | |
# Handle seed | |
if randomize_seed or seed == -1: | |
actual_seed = random.randint(0, 1000000) | |
else: | |
actual_seed = seed | |
generator = torch.Generator().manual_seed(actual_seed) | |
# Use default negative prompt if none provided | |
if not negative_prompt.strip(): | |
negative_prompt = DEFAULT_NEGATIVE_PROMPT | |
# With 70GB VRAM, we can use much larger resolutions! | |
max_pixels = 2048 * 2048 # 4MP max for reasonable generation times | |
current_pixels = width * height | |
if current_pixels > max_pixels: | |
# Scale down proportionally | |
scale = (max_pixels / current_pixels) ** 0.5 | |
width = int(width * scale) | |
height = int(height * scale) | |
# Round to nearest 64 for compatibility | |
width = (width // 64) * 64 | |
height = (height // 64) * 64 | |
size_msg = f"π Scaled to {width}x{height} for optimal performance" | |
else: | |
size_msg = f"π Generating at {width}x{height}" | |
print(f"π¨ Generating: {width}x{height}, {num_steps} steps, guidance: {guidance_scale}, seed: {actual_seed}") | |
# Generate with the powerful H200! | |
with torch.inference_mode(): | |
result = pipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
num_inference_steps=num_steps, | |
guidance_scale=guidance_scale, | |
height=height, | |
width=width, | |
generator=generator | |
) | |
# Extract image | |
if hasattr(result, 'images'): | |
image = result.images[0] | |
elif isinstance(result, list): | |
image = result[0] | |
else: | |
image = result | |
# Cleanup | |
del result | |
torch.cuda.empty_cache() | |
return image, f"β Generated successfully! {size_msg} (Seed: {actual_seed})", get_memory_info(), actual_seed | |
except Exception as e: | |
torch.cuda.empty_cache() | |
return None, f"β Generation failed: {str(e)}", get_memory_info(), seed | |
# Create Gradio interface | |
def create_interface(): | |
with gr.Blocks(title="Cosmos-Predict2 ZeroGPU", theme=gr.themes.Soft()) as interface: | |
gr.Markdown(""" | |
# π Cosmos-Predict2 on ZeroGPU | |
**High-resolution generation β’ Fast inference** | |
This Space uses ZeroGPU for efficient GPU allocation. The model is pre-loaded and ready to generate! | |
""") | |
# Memory status | |
memory_display = gr.Textbox( | |
label="π GPU Status", | |
value=get_memory_info(), | |
interactive=False | |
) | |
with gr.Row(): | |
with gr.Column(): | |
# Generation settings | |
gr.Markdown("### π¨ Generate High-Quality Images") | |
prompt = gr.Textbox( | |
label="Prompt", | |
placeholder="A futuristic robot in a high-tech laboratory with holographic displays...", | |
lines=4, | |
value="A close-up shot captures a vibrant yellow scrubber vigorously working on a grimy plate, its bristles moving in circular motions to lift stubborn grease and food residue. The dish, once covered in remnants of a hearty meal, gradually reveals its original glossy surface." | |
) | |
negative_prompt = gr.Textbox( | |
label="Negative Prompt (Optional - has smart default)", | |
placeholder="Leave empty to use optimized default negative prompt...", | |
lines=2 | |
) | |
with gr.Row(): | |
steps = gr.Slider(10, 50, value=25, step=5, label="Inference Steps") | |
guidance = gr.Slider(1, 15, value=7.5, step=0.5, label="Guidance Scale") | |
with gr.Row(): | |
width = gr.Slider(512, 2048, value=1024, step=64, label="Width") | |
height = gr.Slider(512, 2048, value=1024, step=64, label="Height") | |
with gr.Row(): | |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) | |
seed = gr.Number(label="Seed", value=42, precision=0) | |
generate_btn = gr.Button("π¨ Generate Image", variant="primary", size="lg") | |
with gr.Column(): | |
# Output | |
output_image = gr.Image(label="Generated Image", height=600) | |
generation_status = gr.Textbox(label="Generation Status", interactive=False) | |
seed_output = gr.Number(label="Used Seed", interactive=False) | |
# ZeroGPU info | |
gr.Markdown(""" | |
### π‘ ZeroGPU Features: | |
- **70GB VRAM**: Generate high-resolution images up to 2048x2048 | |
- **Pre-loaded Model**: No waiting for model loading | |
- **H200 powered**: Latest NVIDIA architecture for fast inference | |
- **Smart defaults**: Optimized negative prompt included | |
- **Flash Attention**: Enhanced performance optimizations | |
""") | |
# Event handlers | |
generate_btn.click( | |
generate_image, | |
inputs=[prompt, negative_prompt, steps, guidance, seed, width, height, randomize_seed], | |
outputs=[output_image, generation_status, memory_display, seed_output] | |
) | |
# Auto-refresh memory status | |
def refresh_memory(): | |
return get_memory_info() | |
# Update memory display every 10 seconds | |
gr.Timer(value=10).tick(refresh_memory, outputs=[memory_display]) | |
# Examples optimized for high-resolution | |
gr.Examples( | |
examples=[ | |
["A detailed cyberpunk cityscape at night with neon signs, flying cars, and holographic advertisements, highly detailed, 8k resolution"], | |
["A majestic dragon soaring through storm clouds with lightning, fantasy art, dramatic lighting, ultra detailed"], | |
["A futuristic space station orbiting Earth, with solar panels and docking bays, sci-fi concept art, cinematic"], | |
["A serene Japanese garden with cherry blossoms, koi pond, and traditional architecture, peaceful atmosphere, masterpiece"], | |
["A steampunk mechanical owl with brass gears and copper pipes, intricate details, vintage engineering"], | |
["A well-worn broom sweeps across a dusty wooden floor, its bristles gathering crumbs and flecks of debris in swift, rhythmic strokes"], | |
["A robotic arm tightens a bolt beneath the hood of a car, its tool head rotating with practiced torque, precision engineering"], | |
["A nighttime city bus terminal gradually shifts from stillness to subtle movement, urban night scene with illuminated signage"] | |
], | |
inputs=[prompt], | |
label="π¨ Example Prompts (optimized for high-resolution generation)" | |
) | |
# Usage tips | |
gr.Markdown(""" | |
### π Usage Tips: | |
1. **Ready to go**: Model is pre-loaded, just click generate! | |
2. **High-res**: Try resolutions up to 2048x2048 with the powerful H200 GPU | |
3. **Quality**: Use 25-30 steps for high quality, 15-20 for faster generation | |
4. **Prompts**: Be descriptive and specific for best results | |
5. **Negative prompts**: Leave empty to use optimized defaults, or customize as needed | |
6. **Seeds**: Use randomize for variety, or set specific seed for reproducible results | |
""") | |
return interface | |
if __name__ == "__main__": | |
print("π Starting Cosmos-Predict2 ZeroGPU Space...") | |
interface = create_interface() | |
interface.launch() | |