linoyts HF Staff commited on
Commit
e1cceaf
·
verified ·
1 Parent(s): c2e2423

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -36
app.py CHANGED
@@ -47,11 +47,11 @@ FIXED_FPS = 16
47
  MIN_FRAMES_MODEL = 8
48
  MAX_FRAMES_MODEL = 81
49
 
50
- # Default prompts for different modes
51
  MODE_PROMPTS = {
52
- "Ref2V": "the playful penguin picks up the green cat eye sunglasses and puts them on",
53
- "FLF2V": "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective.",
54
- "Random2V": "Various different characters appear and disappear in a fast transition video showcasting their unique features and personalities. The video is about showcasing different dance styles, with each character performing a distinct dance move. The background is a vibrant, colorful stage with dynamic lighting that changes with each dance style. The camera captures close-ups of the dancers' expressions and movements. Highly dynamic, fast-paced music video, with quick cuts and transitions between characters, cinematic, vibrant colors"
55
  }
56
 
57
  default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
@@ -335,7 +335,7 @@ def get_duration(gallery_images, mode, prompt, height, width,
335
  base_duration = 75
336
 
337
  # Add extra time for background removal processing
338
- if mode == "Ref2V" and remove_bg:
339
  base_duration += 30
340
 
341
  return base_duration
@@ -351,7 +351,7 @@ def generate_video(gallery_images, mode, prompt, height, width,
351
 
352
  Args:
353
  gallery_images (list): List of PIL images from the gallery
354
- mode (str): Processing mode - "Ref2V", "FLF2V", or "Random2V"
355
  prompt (str): Text prompt describing the desired animation
356
  height (int): Target height for the output video
357
  width (int): Target width for the output video
@@ -361,7 +361,7 @@ def generate_video(gallery_images, mode, prompt, height, width,
361
  steps (int): Number of inference steps
362
  seed (int): Random seed for reproducible results
363
  randomize_seed (bool): Whether to use a random seed
364
- remove_bg (bool): Whether to remove background from images (Ref2V mode only)
365
  progress (gr.Progress): Gradio progress tracker
366
 
367
  Returns:
@@ -370,13 +370,13 @@ def generate_video(gallery_images, mode, prompt, height, width,
370
  if gallery_images is None or len(gallery_images) == 0:
371
  raise gr.Error("Please upload at least one image to the gallery.")
372
  else:
373
- # Process images: remove background if requested (Ref2V mode only), then remove alpha channels
374
  processed_images = []
375
  for img in gallery_images:
376
  image = img[0] # Extract PIL image from gallery format
377
 
378
- # Apply background removal only for Ref2V mode if checkbox is checked
379
- if mode == "Ref2V" and remove_bg:
380
  image = remove_background_from_image(image)
381
 
382
  # Always remove alpha channels to ensure RGB format
@@ -385,10 +385,10 @@ def generate_video(gallery_images, mode, prompt, height, width,
385
 
386
  gallery_images = processed_images
387
 
388
- if mode == "FLF2V" and len(gallery_images) >= 2:
389
  gallery_images = gallery_images[:2]
390
- elif mode == "FLF2V" and len(gallery_images) < 2:
391
- raise gr.Error("FLF2V mode requires at least 2 images, but only {} were supplied.".format(len(gallery_images)))
392
 
393
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
394
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
@@ -398,7 +398,7 @@ def generate_video(gallery_images, mode, prompt, height, width,
398
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
399
 
400
  # Process images based on the selected mode
401
- if mode == "FLF2V":
402
  frames, mask = prepare_video_and_mask_FLF2V(
403
  first_img=gallery_images[0],
404
  last_img=gallery_images[1],
@@ -407,10 +407,10 @@ def generate_video(gallery_images, mode, prompt, height, width,
407
  num_frames=num_frames
408
  )
409
  reference_images = None
410
- elif mode == "Ref2V":
411
  frames, mask = prepare_video_and_mask_Ref2V(height=target_h, width=target_w, num_frames=num_frames)
412
  reference_images = gallery_images
413
- else: # mode == "Random2V"
414
  # Calculate dynamic frame indices based on number of images and frames
415
  frame_indices = calculate_random2v_frame_indices(len(gallery_images), num_frames)
416
 
@@ -444,13 +444,13 @@ def generate_video(gallery_images, mode, prompt, height, width,
444
  return video_path, current_seed
445
 
446
  control_modes = """
447
- **3 control modes avilable:**
448
 
449
- **Ref2V (Reference-to-Video)** Generate a video incorporating elements from input reference images
450
 
451
- **FLF2V (First-Last Frame-to-Video)** Generate a video using first and last frame conditioning defined by input images
452
 
453
- **Random2V (Random-to-Video)** Generate a video with intermediate transitions between multiple input images
454
  """
455
 
456
  with gr.Blocks() as demo:
@@ -473,15 +473,22 @@ with gr.Blocks() as demo:
473
  allow_preview=True
474
  )
475
 
476
- # Radio button for mode selection
477
  mode_radio = gr.Radio(
478
- choices=["Ref2V", "FLF2V", "Random2V"],
479
- value="Ref2V",
480
  label="Control Mode",
481
- info="Ref2V: Reference to Video | FLF2V: First-Last Frame to Video | Random2V: Random to Video"
482
  )
483
 
484
- prompt_input = gr.Textbox(label="Prompt", value=MODE_PROMPTS["Ref2V"])
 
 
 
 
 
 
 
485
  duration_seconds_input = gr.Slider(
486
  minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
487
  maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
@@ -494,13 +501,6 @@ with gr.Blocks() as demo:
494
  with gr.Accordion("Advanced Settings", open=False):
495
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
496
 
497
- # Background removal checkbox (only for Ref2V mode)
498
- remove_bg_checkbox = gr.Checkbox(
499
- label="Remove Background (Ref2V mode only)",
500
- value=False,
501
- info="Automatically remove background from input images when using Ref2V mode"
502
- )
503
-
504
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
505
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
506
  with gr.Row():
@@ -516,7 +516,7 @@ with gr.Blocks() as demo:
516
 
517
  # Function to update checkbox visibility based on mode
518
  def update_bg_removal_visibility(mode):
519
- return gr.update(visible=(mode == "Ref2V"))
520
 
521
  # Update prompt when mode changes
522
  mode_radio.change(
@@ -545,12 +545,11 @@ with gr.Blocks() as demo:
545
  guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox, remove_bg_checkbox
546
  ]
547
 
548
- generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
549
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
550
  gr.Examples(
551
  examples=[
552
- [["reachy.png", "sunglasses.jpg", "gpu_hat.png"], "Ref2V", "the cute robot is wearing the sunglasses and the hat that reads 'GPU poor', and moves around playfully", 480, 832],
553
- [["flf2v_input_first_frame.png", "flf2v_input_last_frame.png"], "FLF2V", "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective.", 512, 512],
554
  ],
555
  inputs=[gallery_component, mode_radio, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
556
  )
 
47
  MIN_FRAMES_MODEL = 8
48
  MAX_FRAMES_MODEL = 81
49
 
50
+ # Default prompts for different modes - Updated with new mode names
51
  MODE_PROMPTS = {
52
+ "reference": "the playful penguin picks up the green cat eye sunglasses and puts them on",
53
+ "first - last frame": "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective.",
54
+ "random transitions": "Various different characters appear and disappear in a fast transition video showcasting their unique features and personalities. The video is about showcasing different dance styles, with each character performing a distinct dance move. The background is a vibrant, colorful stage with dynamic lighting that changes with each dance style. The camera captures close-ups of the dancers' expressions and movements. Highly dynamic, fast-paced music video, with quick cuts and transitions between characters, cinematic, vibrant colors"
55
  }
56
 
57
  default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
 
335
  base_duration = 75
336
 
337
  # Add extra time for background removal processing
338
+ if mode == "reference" and remove_bg: # Updated to use new mode name
339
  base_duration += 30
340
 
341
  return base_duration
 
351
 
352
  Args:
353
  gallery_images (list): List of PIL images from the gallery
354
+ mode (str): Processing mode - "reference", "first - last frame", or "random transitions"
355
  prompt (str): Text prompt describing the desired animation
356
  height (int): Target height for the output video
357
  width (int): Target width for the output video
 
361
  steps (int): Number of inference steps
362
  seed (int): Random seed for reproducible results
363
  randomize_seed (bool): Whether to use a random seed
364
+ remove_bg (bool): Whether to remove background from images (reference mode only)
365
  progress (gr.Progress): Gradio progress tracker
366
 
367
  Returns:
 
370
  if gallery_images is None or len(gallery_images) == 0:
371
  raise gr.Error("Please upload at least one image to the gallery.")
372
  else:
373
+ # Process images: remove background if requested (reference mode only), then remove alpha channels
374
  processed_images = []
375
  for img in gallery_images:
376
  image = img[0] # Extract PIL image from gallery format
377
 
378
+ # Apply background removal only for reference mode if checkbox is checked
379
+ if mode == "reference" and remove_bg: # Updated to use new mode name
380
  image = remove_background_from_image(image)
381
 
382
  # Always remove alpha channels to ensure RGB format
 
385
 
386
  gallery_images = processed_images
387
 
388
+ if mode == "first - last frame" and len(gallery_images) >= 2: # Updated mode name
389
  gallery_images = gallery_images[:2]
390
+ elif mode == "first - last frame" and len(gallery_images) < 2: # Updated mode name
391
+ raise gr.Error("First - Last Frame mode requires at least 2 images, but only {} were supplied.".format(len(gallery_images)))
392
 
393
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
394
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
 
398
  current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
399
 
400
  # Process images based on the selected mode
401
+ if mode == "first - last frame": # Updated mode name
402
  frames, mask = prepare_video_and_mask_FLF2V(
403
  first_img=gallery_images[0],
404
  last_img=gallery_images[1],
 
407
  num_frames=num_frames
408
  )
409
  reference_images = None
410
+ elif mode == "reference": # Updated mode name
411
  frames, mask = prepare_video_and_mask_Ref2V(height=target_h, width=target_w, num_frames=num_frames)
412
  reference_images = gallery_images
413
+ else: # mode == "random transitions" # Updated mode name
414
  # Calculate dynamic frame indices based on number of images and frames
415
  frame_indices = calculate_random2v_frame_indices(len(gallery_images), num_frames)
416
 
 
444
  return video_path, current_seed
445
 
446
  control_modes = """
447
+ **3 control modes available:**
448
 
449
+ **Reference** Generate a video incorporating elements from input reference images
450
 
451
+ **First - Last Frame** Generate a video using first and last frame conditioning defined by input images
452
 
453
+ **Random Transitions** Generate a video with intermediate transitions between multiple input images
454
  """
455
 
456
  with gr.Blocks() as demo:
 
473
  allow_preview=True
474
  )
475
 
476
+ # Radio button for mode selection with updated names
477
  mode_radio = gr.Radio(
478
+ choices=["reference", "first - last frame", "random transitions"],
479
+ value="reference",
480
  label="Control Mode",
481
+ info="Reference: Reference to Video | First - Last Frame: First-Last Frame to Video | Random Transitions: Random to Video"
482
  )
483
 
484
+ # Background removal checkbox moved here - right beneath control modes
485
+ remove_bg_checkbox = gr.Checkbox(
486
+ label="Remove Background (Reference mode only)",
487
+ value=False,
488
+ info="Automatically remove background from input images when using Reference mode"
489
+ )
490
+
491
+ prompt_input = gr.Textbox(label="Prompt", value=MODE_PROMPTS["reference"])
492
  duration_seconds_input = gr.Slider(
493
  minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1),
494
  maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1),
 
501
  with gr.Accordion("Advanced Settings", open=False):
502
  negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
503
 
 
 
 
 
 
 
 
504
  seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
505
  randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
506
  with gr.Row():
 
516
 
517
  # Function to update checkbox visibility based on mode
518
  def update_bg_removal_visibility(mode):
519
+ return gr.update(visible=(mode == "reference")) # Updated to use new mode name
520
 
521
  # Update prompt when mode changes
522
  mode_radio.change(
 
545
  guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox, remove_bg_checkbox
546
  ]
547
 
 
548
  generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
549
  gr.Examples(
550
  examples=[
551
+ [["reachy.png", "sunglasses.jpg", "gpu_hat.png"], "reference", "the cute robot is wearing the sunglasses and the hat that reads 'GPU poor', and moves around playfully", 480, 832],
552
+ [["flf2v_input_first_frame.png", "flf2v_input_last_frame.png"], "first - last frame", "CG animation style, a small blue bird takes off from the ground, flapping its wings. The bird's feathers are delicate, with a unique pattern on its chest. The background shows a blue sky with white clouds under bright sunshine. The camera follows the bird upward, capturing its flight and the vastness of the sky from a close-up, low-angle perspective.", 512, 512],
553
  ],
554
  inputs=[gallery_component, mode_radio, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
555
  )