Spaces:

adil9858
/

AI_Image_Caption

Running

App Files Files Community

AI_Image_Caption / app.py

adil9858

Update app.py

d9a8c7f verified 4 months ago

raw

history blame contribute delete

3.97 kB

	import streamlit as st
	import warnings
	warnings.filterwarnings('ignore')
	import requests
	import base64
	from PIL import Image
	from io import BytesIO

	# Function to encode an image into base64 format
	def encode_image(img):
	buffered = BytesIO()
	img.save(buffered, format="PNG")
	encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8")
	return encoded_string

	# Function to get explanation from VLM API
	def explain_image_with_vlm(image, prompt):
	api = "https://api.hyperbolic.xyz/v1/chat/completions"
	api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJhZGlsYXppejIwMTNAZ21haWwuY29tIiwiaWF0IjoxNzMyODU1NDI1fQ.lRjbz9LMW9jj7Lf7I8m_dTRh4KQ1wDCdWiTRGErMuEk"

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}",
	}

	base64_img = encode_image(image)

	payload = {
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt}, # Use the user-provided prompt
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_img}"},
	},
	],
	}
	],
	"model": "Qwen/Qwen2-VL-72B-Instruct",
	"max_tokens": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	}

	response = requests.post(api, headers=headers, json=payload)
	if response.status_code == 200:
	return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No explanation found.")
	else:
	return f"Error: {response.status_code} - {response.text}"

	# Streamlit UI
	st.set_page_config(page_title="🔮 AI Vision: Image Insights", layout="wide")

	# Header section with futuristic visuals
	st.markdown(
	"""
	<style>
	.main-header {
	text-align: center;
	font-size: 2.5rem;
	color: #00FFFF;
	font-family: 'Courier New', monospace;
	text-shadow: 0px 0px 8px #00FFFF;
	margin-bottom: 20px;
	}
	.sub-header {
	text-align: center;
	font-size: 1.5rem;
	color: #FFD700;
	font-family: 'Courier New', monospace;
	margin-bottom: 40px;
	}
	</style>
	<div class="main-header">🔮 ImageX</div>
	<div class="sub-header">Image Data Uncovered</div>
	""",
	unsafe_allow_html=True
	)

	# Sidebar for additional futuristic customization
	st.sidebar.title("🔧 Settings")
	image_format = st.sidebar.radio("Select Image Format:", ["PNG", "JPEG"], index=0)
	explanation_length = st.sidebar.slider("Explanation Length (words):", min_value=5, max_value=20, value=10)

	def styled_header(header_text):
	return f"<h3 style='color:#7FFF00;'>{header_text}</h3>"

	# Main Camera Input Section
	img_file_buffer = st.camera_input("🚀 Capture Your Image Here")

	# Text prompt input
	user_prompt = st.text_input(
	"📝 Enter your prompt (e.g., 'Explain the image', 'What are the functions in this graph?', 'Describe the scene'):",
	value="Enter your Prompt:"
	)

	if img_file_buffer and user_prompt:
	# Display captured image
	image = Image.open(img_file_buffer)
	#st.markdown(styled_header("📸 Your Captured Image:"), unsafe_allow_html=True)
	#st.image(image, caption="Captured Image")

	st.markdown(styled_header("🤖 Image Analysis:"), unsafe_allow_html=True)
	with st.spinner("🔍 The AI is analyzing your image. Please wait..."):
	explanation = explain_image_with_vlm(image, user_prompt)
	st.success("✨ Analysis Complete!")
	st.write(f"AI Insight: {explanation}")

	# Footer
	st.markdown(
	"""
	<footer style="text-align: center; margin-top: 50px;">
	<hr style="border: 1px solid #00FFFF;">
	<p style="font-family: 'Courier New', monospace; color: #AAAAAA;">Developed by: <b>DataScienceProF</b> \| <i>Empowering the Future</i></p>
	</footer>
	""",
	unsafe_allow_html=True
	)