akhaliq HF Staff commited on
Commit
ef8c514
·
1 Parent(s): ebd4771

add text to video and fix input image issue for image to video

Browse files
Files changed (1) hide show
  1. app.py +240 -21
app.py CHANGED
@@ -1315,7 +1315,7 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
1315
  )
1316
  print(f"[Image2Video] InferenceClient initialized (provider=auto)")
1317
 
1318
- # Normalize input image to bytes
1319
  import io
1320
  from PIL import Image
1321
  try:
@@ -1323,19 +1323,18 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
1323
  except Exception:
1324
  np = None
1325
 
1326
- print(f"[Image2Video] Normalizing input image type={type(input_image_data)}")
1327
- if hasattr(input_image_data, 'read'):
1328
- raw = input_image_data.read()
1329
- pil_image = Image.open(io.BytesIO(raw))
1330
- elif hasattr(input_image_data, 'mode') and hasattr(input_image_data, 'size'):
1331
- pil_image = input_image_data
1332
- elif np is not None and isinstance(input_image_data, np.ndarray):
1333
- pil_image = Image.fromarray(input_image_data)
1334
- elif isinstance(input_image_data, (bytes, bytearray)):
1335
- pil_image = Image.open(io.BytesIO(input_image_data))
1336
- else:
1337
- pil_image = Image.open(io.BytesIO(bytes(input_image_data)))
1338
-
1339
  if pil_image.mode != 'RGB':
1340
  pil_image = pil_image.convert('RGB')
1341
  try:
@@ -1343,9 +1342,35 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
1343
  except Exception:
1344
  pass
1345
 
1346
- buf = io.BytesIO()
1347
- pil_image.save(buf, format='PNG')
1348
- input_bytes = buf.getvalue()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1349
 
1350
  # Call image-to-video; require method support
1351
  model_id = "Lightricks/LTX-Video-0.9.8-13B-distilled"
@@ -1402,7 +1427,7 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
1402
 
1403
  if file_url:
1404
  video_html = (
1405
- f"<video controls style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\">"
1406
  f"<source src=\"{file_url}\" type=\"video/mp4\" />"
1407
  f"Your browser does not support the video tag."
1408
  f"</video>"
@@ -1419,6 +1444,86 @@ def generate_video_from_image(input_image_data, prompt: str, session_id: Optiona
1419
  print(f"Image-to-video generation error: {str(e)}")
1420
  return f"Error generating video (image-to-video): {str(e)}"
1421
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1422
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
1423
  """Extract image generation prompts from the full text based on number of images needed"""
1424
  # Use the entire text as the base prompt for image generation
@@ -1638,6 +1743,79 @@ def create_image_replacement_blocks_text_to_image_single(html_content: str, prom
1638
  # If no <body>, just append
1639
  return f"{SEARCH_START}\n\n{DIVIDER}\n{image_html}\n{REPLACE_END}"
1640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641
  def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
1642
  """Create search/replace blocks using image-to-image generation with a provided input image.
1643
 
@@ -1810,7 +1988,7 @@ def create_video_replacement_blocks_from_input_image(html_content: str, user_pro
1810
  print("[Image2Video] No <body> tag; appending video via replacement block")
1811
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1812
 
1813
- def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None) -> str:
1814
  """Apply text-to-image and/or image-to-image replacements to HTML content.
1815
 
1816
  If both toggles are enabled, text-to-image replacements run first, then image-to-image.
@@ -1845,6 +2023,18 @@ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_t
1845
  print("[MediaApply] No i2v replacement blocks generated")
1846
  return result
1847
 
 
 
 
 
 
 
 
 
 
 
 
 
1848
  # If an input image is provided and image-to-image is enabled, we only replace one image
1849
  # and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
1850
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
@@ -2693,7 +2883,7 @@ The HTML code above contains the complete original website structure with all im
2693
  stop_generation = False
2694
 
2695
 
2696
- def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None):
2697
  if query is None:
2698
  query = ''
2699
  if _history is None:
@@ -2845,6 +3035,8 @@ This will help me create a better design for you."""
2845
  enable_image_to_video=enable_image_to_video,
2846
  image_to_video_prompt=image_to_video_prompt,
2847
  session_id=session_id,
 
 
2848
  )
2849
 
2850
  _history.append([query, final_content])
@@ -3010,6 +3202,8 @@ This will help me create a better design for you."""
3010
  enable_image_to_video=enable_image_to_video,
3011
  image_to_video_prompt=image_to_video_prompt,
3012
  session_id=session_id,
 
 
3013
  )
3014
 
3015
  yield {
@@ -3032,6 +3226,8 @@ This will help me create a better design for you."""
3032
  enable_image_to_video=enable_image_to_video,
3033
  image_to_video_prompt=image_to_video_prompt,
3034
  session_id=session_id,
 
 
3035
  )
3036
 
3037
  preview_val = None
@@ -3432,6 +3628,8 @@ This will help me create a better design for you."""
3432
  image_to_video_prompt=image_to_video_prompt,
3433
  session_id=session_id,
3434
  text_to_image_prompt=text_to_image_prompt,
 
 
3435
  )
3436
 
3437
  # Update history with the cleaned content
@@ -3459,6 +3657,8 @@ This will help me create a better design for you."""
3459
  enable_image_to_video=enable_image_to_video,
3460
  image_to_video_prompt=image_to_video_prompt,
3461
  session_id=session_id,
 
 
3462
  )
3463
 
3464
  _history.append([query, final_content])
@@ -4580,6 +4780,20 @@ with gr.Blocks(
4580
  visible=False
4581
  )
4582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4583
  def on_image_to_image_toggle(toggled):
4584
  # Show generation image input and its prompt when image-to-image is enabled
4585
  return gr.update(visible=bool(toggled)), gr.update(visible=bool(toggled))
@@ -4605,6 +4819,11 @@ with gr.Blocks(
4605
  inputs=[image_generation_toggle],
4606
  outputs=[text_to_image_prompt]
4607
  )
 
 
 
 
 
4608
  model_dropdown = gr.Dropdown(
4609
  choices=[model['name'] for model in AVAILABLE_MODELS],
4610
  value=DEFAULT_MODEL_NAME,
@@ -4855,7 +5074,7 @@ with gr.Blocks(
4855
  show_progress="hidden",
4856
  ).then(
4857
  generation_code,
4858
- inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt],
4859
  outputs=[code_output, history, sandbox, history_output]
4860
  ).then(
4861
  end_generation_ui,
 
1315
  )
1316
  print(f"[Image2Video] InferenceClient initialized (provider=auto)")
1317
 
1318
+ # Normalize input image to bytes, with downscale/compress to cap request size
1319
  import io
1320
  from PIL import Image
1321
  try:
 
1323
  except Exception:
1324
  np = None
1325
 
1326
+ def _load_pil(img_like) -> Image.Image:
1327
+ if hasattr(img_like, 'read'):
1328
+ return Image.open(io.BytesIO(img_like.read()))
1329
+ if hasattr(img_like, 'mode') and hasattr(img_like, 'size'):
1330
+ return img_like
1331
+ if np is not None and isinstance(img_like, np.ndarray):
1332
+ return Image.fromarray(img_like)
1333
+ if isinstance(img_like, (bytes, bytearray)):
1334
+ return Image.open(io.BytesIO(img_like))
1335
+ return Image.open(io.BytesIO(bytes(img_like)))
1336
+
1337
+ pil_image = _load_pil(input_image_data)
 
1338
  if pil_image.mode != 'RGB':
1339
  pil_image = pil_image.convert('RGB')
1340
  try:
 
1342
  except Exception:
1343
  pass
1344
 
1345
+ # Progressive encode to keep payload under ~3.9MB (below 4MB limit)
1346
+ MAX_BYTES = 3_900_000
1347
+ max_dim = 1024 # initial cap on longest edge
1348
+ quality = 90
1349
+
1350
+ def encode_current(pil: Image.Image, q: int) -> bytes:
1351
+ tmp = io.BytesIO()
1352
+ pil.save(tmp, format='JPEG', quality=q, optimize=True)
1353
+ return tmp.getvalue()
1354
+
1355
+ # Downscale while the longest edge exceeds max_dim
1356
+ while max(pil_image.size) > max_dim:
1357
+ ratio = max_dim / float(max(pil_image.size))
1358
+ new_size = (max(1, int(pil_image.size[0] * ratio)), max(1, int(pil_image.size[1] * ratio)))
1359
+ pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
1360
+
1361
+ encoded = encode_current(pil_image, quality)
1362
+ # If still too big, iteratively reduce quality, then dimensions
1363
+ while len(encoded) > MAX_BYTES and (quality > 40 or max(pil_image.size) > 640):
1364
+ if quality > 40:
1365
+ quality -= 10
1366
+ else:
1367
+ # reduce dims by 15% if already at low quality
1368
+ new_w = max(1, int(pil_image.size[0] * 0.85))
1369
+ new_h = max(1, int(pil_image.size[1] * 0.85))
1370
+ pil_image = pil_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
1371
+ encoded = encode_current(pil_image, quality)
1372
+
1373
+ input_bytes = encoded
1374
 
1375
  # Call image-to-video; require method support
1376
  model_id = "Lightricks/LTX-Video-0.9.8-13B-distilled"
 
1427
 
1428
  if file_url:
1429
  video_html = (
1430
+ f"<video controls autoplay muted loop playsinline style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\">"
1431
  f"<source src=\"{file_url}\" type=\"video/mp4\" />"
1432
  f"Your browser does not support the video tag."
1433
  f"</video>"
 
1444
  print(f"Image-to-video generation error: {str(e)}")
1445
  return f"Error generating video (image-to-video): {str(e)}"
1446
 
1447
+ def generate_video_from_text(prompt: str, session_id: Optional[str] = None) -> str:
1448
+ """Generate a video from a text prompt using Hugging Face InferenceClient.
1449
+
1450
+ Returns an HTML <video> tag whose source points to a local file URL (file://...).
1451
+ """
1452
+ try:
1453
+ print("[Text2Video] Starting video generation from text")
1454
+ if not os.getenv('HF_TOKEN'):
1455
+ print("[Text2Video] Missing HF_TOKEN")
1456
+ return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
1457
+
1458
+ client = InferenceClient(
1459
+ provider="auto",
1460
+ api_key=os.getenv('HF_TOKEN'),
1461
+ bill_to="huggingface",
1462
+ )
1463
+ print("[Text2Video] InferenceClient initialized (provider=auto)")
1464
+
1465
+ # Ensure the client has text_to_video (newer huggingface_hub)
1466
+ text_to_video_method = getattr(client, "text_to_video", None)
1467
+ if not callable(text_to_video_method):
1468
+ print("[Text2Video] InferenceClient.text_to_video not available in this huggingface_hub version")
1469
+ return (
1470
+ "Error generating video (text-to-video): Your installed huggingface_hub version "
1471
+ "does not expose InferenceClient.text_to_video. Please upgrade with "
1472
+ "`pip install -U huggingface_hub` and try again."
1473
+ )
1474
+
1475
+ model_id = "Wan-AI/Wan2.2-TI2V-5B"
1476
+ prompt_str = (prompt or "").strip()
1477
+ print(f"[Text2Video] Calling text_to_video with model={model_id}, prompt length={len(prompt_str)}")
1478
+ video_bytes = text_to_video_method(
1479
+ prompt_str,
1480
+ model=model_id,
1481
+ )
1482
+ print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
1483
+
1484
+ # Persist to a temp .mp4 and return a file URL based <video>
1485
+ try:
1486
+ _ensure_video_dir_exists()
1487
+ file_name = f"{uuid.uuid4()}.mp4"
1488
+ file_path = os.path.join(VIDEO_TEMP_DIR, file_name)
1489
+ with open(file_path, "wb") as f:
1490
+ f.write(video_bytes)
1491
+ _register_video_for_session(session_id, file_path)
1492
+ try:
1493
+ file_size = os.path.getsize(file_path)
1494
+ except Exception:
1495
+ file_size = -1
1496
+ print(f"[Text2Video] Saved video to temp file: {file_path} (size={file_size} bytes)")
1497
+ except Exception as save_exc:
1498
+ print(f"[Text2Video] Warning: could not persist temp video file: {save_exc}")
1499
+
1500
+ # Build file:// URL
1501
+ file_url = None
1502
+ try:
1503
+ if 'file_path' in locals() and file_path:
1504
+ from pathlib import Path
1505
+ file_url = Path(file_path).as_uri()
1506
+ except Exception:
1507
+ file_url = None
1508
+
1509
+ if not file_url:
1510
+ return "Error generating video (text-to-video): Could not persist video to a local file."
1511
+
1512
+ video_html = (
1513
+ f"<video controls autoplay muted loop playsinline style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\">"
1514
+ f"<source src=\"{file_url}\" type=\"video/mp4\" />"
1515
+ f"Your browser does not support the video tag."
1516
+ f"</video>"
1517
+ )
1518
+ print("[Text2Video] Successfully generated video HTML tag from text")
1519
+ return video_html
1520
+ except Exception as e:
1521
+ import traceback
1522
+ print("[Text2Video] Exception during generation:")
1523
+ traceback.print_exc()
1524
+ print(f"Text-to-video generation error: {str(e)}")
1525
+ return f"Error generating video (text-to-video): {str(e)}"
1526
+
1527
  def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
1528
  """Extract image generation prompts from the full text based on number of images needed"""
1529
  # Use the entire text as the base prompt for image generation
 
1743
  # If no <body>, just append
1744
  return f"{SEARCH_START}\n\n{DIVIDER}\n{image_html}\n{REPLACE_END}"
1745
 
1746
+ def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str, session_id: Optional[str] = None) -> str:
1747
+ """Create search/replace blocks that generate and insert ONLY ONE text-to-video result.
1748
+
1749
+ Replaces the first detected <img> placeholder; if none found, inserts one video near the top of <body>.
1750
+ """
1751
+ if not prompt or not prompt.strip():
1752
+ return ""
1753
+
1754
+ import re
1755
+
1756
+ # Detect the same placeholders as image counterparts, to replace the first image slot with a video
1757
+ placeholder_patterns = [
1758
+ r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
1759
+ r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
1760
+ r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
1761
+ r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
1762
+ r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
1763
+ r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
1764
+ r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
1765
+ r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
1766
+ r'<img[^>]*src=["\']#["\'][^>]*>',
1767
+ r'<img[^>]*src=["\']about:blank["\'][^>]*>',
1768
+ ]
1769
+
1770
+ placeholder_images = []
1771
+ for pattern in placeholder_patterns:
1772
+ matches = re.findall(pattern, html_content, re.IGNORECASE)
1773
+ if matches:
1774
+ placeholder_images.extend(matches)
1775
+
1776
+ if not placeholder_images:
1777
+ img_pattern = r'<img[^>]*>'
1778
+ placeholder_images = re.findall(img_pattern, html_content)
1779
+
1780
+ video_html = generate_video_from_text(prompt, session_id=session_id)
1781
+ if video_html.startswith("Error"):
1782
+ return ""
1783
+
1784
+ # Replace first placeholder if present
1785
+ if placeholder_images:
1786
+ placeholder = placeholder_images[0]
1787
+ placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
1788
+ placeholder_variations = [
1789
+ placeholder,
1790
+ placeholder_clean,
1791
+ placeholder_clean.replace('"', "'"),
1792
+ placeholder_clean.replace("'", '"'),
1793
+ re.sub(r'\s+', ' ', placeholder_clean),
1794
+ placeholder_clean.replace(' ', ' '),
1795
+ ]
1796
+ blocks = []
1797
+ for variation in placeholder_variations:
1798
+ blocks.append(f"""{SEARCH_START}
1799
+ {variation}
1800
+ {DIVIDER}
1801
+ {video_html}
1802
+ {REPLACE_END}""")
1803
+ return '\n\n'.join(blocks)
1804
+
1805
+ # Otherwise insert after <body>
1806
+ if '<body' in html_content:
1807
+ body_end = html_content.find('>', html_content.find('<body')) + 1
1808
+ insertion_point = html_content[:body_end] + '\n '
1809
+ return f"""{SEARCH_START}
1810
+ {insertion_point}
1811
+ {DIVIDER}
1812
+ {insertion_point}
1813
+ {video_html}
1814
+ {REPLACE_END}"""
1815
+
1816
+ # If no <body>, just append
1817
+ return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1818
+
1819
  def create_image_replacement_blocks_from_input_image(html_content: str, user_prompt: str, input_image_data, max_images: int = 1) -> str:
1820
  """Create search/replace blocks using image-to-image generation with a provided input image.
1821
 
 
1988
  print("[Image2Video] No <body> tag; appending video via replacement block")
1989
  return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
1990
 
1991
+ def apply_generated_images_to_html(html_content: str, user_prompt: str, enable_text_to_image: bool, enable_image_to_image: bool, input_image_data, image_to_image_prompt: str | None = None, text_to_image_prompt: str | None = None, enable_image_to_video: bool = False, image_to_video_prompt: str | None = None, session_id: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: str | None = None) -> str:
1992
  """Apply text-to-image and/or image-to-image replacements to HTML content.
1993
 
1994
  If both toggles are enabled, text-to-image replacements run first, then image-to-image.
 
2023
  print("[MediaApply] No i2v replacement blocks generated")
2024
  return result
2025
 
2026
+ # If text-to-video is enabled, insert a generated video (no input image required) and return.
2027
+ if enable_text_to_video and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
2028
+ t2v_prompt = (text_to_video_prompt or user_prompt or "").strip()
2029
+ print(f"[MediaApply] Running text-to-video with prompt len={len(t2v_prompt)}")
2030
+ blocks_tv = create_video_replacement_blocks_text_to_video(result, t2v_prompt, session_id=session_id)
2031
+ if blocks_tv:
2032
+ print("[MediaApply] Applying text-to-video replacement blocks")
2033
+ result = apply_search_replace_changes(result, blocks_tv)
2034
+ else:
2035
+ print("[MediaApply] No t2v replacement blocks generated")
2036
+ return result
2037
+
2038
  # If an input image is provided and image-to-image is enabled, we only replace one image
2039
  # and skip text-to-image to satisfy the requirement to replace exactly the number of uploaded images.
2040
  if enable_image_to_image and input_image_data is not None and (result.strip().startswith('<!DOCTYPE html>') or result.strip().startswith('<html')):
 
2883
  stop_generation = False
2884
 
2885
 
2886
+ def generation_code(query: Optional[str], vlm_image: Optional[gr.Image], gen_image: Optional[gr.Image], file: Optional[str], website_url: Optional[str], _setting: Dict[str, str], _history: Optional[History], _current_model: Dict, enable_search: bool = False, language: str = "html", provider: str = "auto", enable_image_generation: bool = False, enable_image_to_image: bool = False, image_to_image_prompt: Optional[str] = None, text_to_image_prompt: Optional[str] = None, enable_image_to_video: bool = False, image_to_video_prompt: Optional[str] = None, enable_text_to_video: bool = False, text_to_video_prompt: Optional[str] = None):
2887
  if query is None:
2888
  query = ''
2889
  if _history is None:
 
3035
  enable_image_to_video=enable_image_to_video,
3036
  image_to_video_prompt=image_to_video_prompt,
3037
  session_id=session_id,
3038
+ enable_text_to_video=enable_text_to_video,
3039
+ text_to_video_prompt=text_to_video_prompt,
3040
  )
3041
 
3042
  _history.append([query, final_content])
 
3202
  enable_image_to_video=enable_image_to_video,
3203
  image_to_video_prompt=image_to_video_prompt,
3204
  session_id=session_id,
3205
+ enable_text_to_video=enable_text_to_video,
3206
+ text_to_video_prompt=text_to_video_prompt,
3207
  )
3208
 
3209
  yield {
 
3226
  enable_image_to_video=enable_image_to_video,
3227
  image_to_video_prompt=image_to_video_prompt,
3228
  session_id=session_id,
3229
+ enable_text_to_video=enable_text_to_video,
3230
+ text_to_video_prompt=text_to_video_prompt,
3231
  )
3232
 
3233
  preview_val = None
 
3628
  image_to_video_prompt=image_to_video_prompt,
3629
  session_id=session_id,
3630
  text_to_image_prompt=text_to_image_prompt,
3631
+ enable_text_to_video=enable_text_to_video,
3632
+ text_to_video_prompt=text_to_video_prompt,
3633
  )
3634
 
3635
  # Update history with the cleaned content
 
3657
  enable_image_to_video=enable_image_to_video,
3658
  image_to_video_prompt=image_to_video_prompt,
3659
  session_id=session_id,
3660
+ enable_text_to_video=enable_text_to_video,
3661
+ text_to_video_prompt=text_to_video_prompt,
3662
  )
3663
 
3664
  _history.append([query, final_content])
 
4780
  visible=False
4781
  )
4782
 
4783
+ # Text-to-Video
4784
+ text_to_video_toggle = gr.Checkbox(
4785
+ label="📹 Generate Video (text → video)",
4786
+ value=False,
4787
+ visible=True,
4788
+ info="Generate a short video directly from your prompt using Wan-AI/Wan2.2-TI2V-5B"
4789
+ )
4790
+ text_to_video_prompt = gr.Textbox(
4791
+ label="Text-to-Video Prompt",
4792
+ placeholder="Describe the video to generate (e.g., 'A young man walking on the street')",
4793
+ lines=2,
4794
+ visible=False
4795
+ )
4796
+
4797
  def on_image_to_image_toggle(toggled):
4798
  # Show generation image input and its prompt when image-to-image is enabled
4799
  return gr.update(visible=bool(toggled)), gr.update(visible=bool(toggled))
 
4819
  inputs=[image_generation_toggle],
4820
  outputs=[text_to_image_prompt]
4821
  )
4822
+ text_to_video_toggle.change(
4823
+ on_text_to_image_toggle,
4824
+ inputs=[text_to_video_toggle],
4825
+ outputs=[text_to_video_prompt]
4826
+ )
4827
  model_dropdown = gr.Dropdown(
4828
  choices=[model['name'] for model in AVAILABLE_MODELS],
4829
  value=DEFAULT_MODEL_NAME,
 
5074
  show_progress="hidden",
5075
  ).then(
5076
  generation_code,
5077
+ inputs=[input, image_input, generation_image_input, file_input, website_url_input, setting, history, current_model, search_toggle, language_dropdown, provider_state, image_generation_toggle, image_to_image_toggle, image_to_image_prompt, text_to_image_prompt, image_to_video_toggle, image_to_video_prompt, text_to_video_toggle, text_to_video_prompt],
5078
  outputs=[code_output, history, sandbox, history_output]
5079
  ).then(
5080
  end_generation_ui,