Nymbo commited on
Commit
4264b3e
·
verified ·
1 Parent(s): 385a33a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +436 -141
app.py CHANGED
@@ -2,12 +2,56 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import json
 
 
 
5
 
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def respond(
10
  message,
 
11
  history: list[tuple[str, str]],
12
  system_message,
13
  max_tokens,
@@ -16,18 +60,19 @@ def respond(
16
  frequency_penalty,
17
  seed,
18
  provider,
19
- custom_api_key, # New parameter for BYOK
20
  custom_model,
21
  model_search_term,
22
  selected_model
23
  ):
24
  print(f"Received message: {message}")
 
25
  print(f"History: {history}")
26
  print(f"System message: {system_message}")
27
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
28
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
29
  print(f"Selected provider: {provider}")
30
- print(f"Custom API Key provided: {bool(custom_api_key.strip())}") # Log whether a custom key was provided without printing the key
31
  print(f"Selected model (custom_model): {custom_model}")
32
  print(f"Model search term: {model_search_term}")
33
  print(f"Selected model from radio: {selected_model}")
@@ -57,17 +102,50 @@ def respond(
57
  for val in history:
58
  user_part = val[0]
59
  assistant_part = val[1]
 
 
60
  if user_part:
61
- messages.append({"role": "user", "content": user_part})
62
- print(f"Added user message to context: {user_part}")
 
 
 
 
 
 
 
 
 
63
  if assistant_part:
64
  messages.append({"role": "assistant", "content": assistant_part})
65
  print(f"Added assistant message to context: {assistant_part}")
66
 
67
- # Append the latest user message
68
- messages.append({"role": "user", "content": message})
69
- print("Latest user message appended.")
70
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # Determine which model to use, prioritizing custom_model if provided
72
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
73
  print(f"Model selected for inference: {model_to_use}")
@@ -90,12 +168,11 @@ def respond(
90
  # Use the InferenceClient for making the request
91
  try:
92
  # Create a generator for the streaming response
93
- # The provider is already set when initializing the client
94
  stream = client.chat_completion(
95
  model=model_to_use,
96
  messages=messages,
97
  stream=True,
98
- **parameters # Pass all other parameters
99
  )
100
 
101
  # Print a starting message for token streaming
@@ -129,94 +206,39 @@ def validate_provider(api_key, provider):
129
  return gr.update(value="hf-inference")
130
  return gr.update(value=provider)
131
 
132
- # GRADIO UI
133
-
134
- chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
135
- print("Chatbot interface created.")
136
-
137
- # Basic input components
138
- system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
139
 
140
- max_tokens_slider = gr.Slider(
141
- minimum=1,
142
- maximum=4096,
143
- value=512,
144
- step=1,
145
- label="Max tokens"
146
- )
147
- temperature_slider = gr.Slider(
148
- minimum=0.1,
149
- maximum=4.0,
150
- value=0.7,
151
- step=0.1,
152
- label="Temperature"
153
- )
154
- top_p_slider = gr.Slider(
155
- minimum=0.1,
156
- maximum=1.0,
157
- value=0.95,
158
- step=0.05,
159
- label="Top-P"
160
- )
161
- frequency_penalty_slider = gr.Slider(
162
- minimum=-2.0,
163
- maximum=2.0,
164
- value=0.0,
165
- step=0.1,
166
- label="Frequency Penalty"
167
- )
168
- seed_slider = gr.Slider(
169
- minimum=-1,
170
- maximum=65535,
171
- value=-1,
172
- step=1,
173
- label="Seed (-1 for random)"
174
- )
175
 
176
- # Provider selection
177
- providers_list = [
178
- "hf-inference", # Default Hugging Face Inference
179
- "cerebras", # Cerebras provider
180
- "together", # Together AI
181
- "sambanova", # SambaNova
182
- "novita", # Novita AI
183
- "cohere", # Cohere
184
- "fireworks-ai", # Fireworks AI
185
- "hyperbolic", # Hyperbolic
186
- "nebius", # Nebius
 
 
 
 
187
  ]
188
 
189
- provider_radio = gr.Radio(
190
- choices=providers_list,
191
- value="hf-inference",
192
- label="Inference Provider",
193
- info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
194
- )
195
-
196
- # New BYOK textbox - Added for the new feature
197
- byok_textbox = gr.Textbox(
198
- value="",
199
- label="BYOK (Bring Your Own Key)",
200
- info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
201
- placeholder="Enter your Hugging Face API token",
202
- type="password" # Hide the API key for security
203
- )
204
-
205
- # Custom model box
206
- custom_model_box = gr.Textbox(
207
- value="",
208
- label="Custom Model",
209
- info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
210
- placeholder="meta-llama/Llama-3.3-70B-Instruct"
211
- )
212
-
213
- # Model selection components
214
- model_search_box = gr.Textbox(
215
- label="Filter Models",
216
- placeholder="Search for a featured model...",
217
- lines=1
218
- )
219
-
220
  models_list = [
221
  "meta-llama/Llama-3.3-70B-Instruct",
222
  "meta-llama/Llama-3.1-70B-Instruct",
@@ -246,59 +268,337 @@ models_list = [
246
  "HuggingFaceTB/SmolLM2-360M-Instruct",
247
  "tiiuae/falcon-7b-instruct",
248
  "01-ai/Yi-1.5-34B-Chat",
249
- ]
250
-
251
- featured_model_radio = gr.Radio(
252
- label="Select a model below",
253
- choices=models_list,
254
- value="meta-llama/Llama-3.3-70B-Instruct",
255
- interactive=True
256
- )
257
-
258
- def filter_models(search_term):
259
- print(f"Filtering models with search term: {search_term}")
260
- filtered = [m for m in models_list if search_term.lower() in m.lower()]
261
- print(f"Filtered models: {filtered}")
262
- return gr.update(choices=filtered)
263
 
264
- def set_custom_model_from_radio(selected):
 
265
  """
266
- This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
267
- We will update the Custom Model text box with that selection automatically.
268
  """
269
- print(f"Featured model selected: {selected}")
270
- return selected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
- # Create the Gradio interface
273
- demo = gr.ChatInterface(
274
- fn=respond,
275
- additional_inputs=[
276
- system_message_box,
277
- max_tokens_slider,
278
- temperature_slider,
279
- top_p_slider,
280
- frequency_penalty_slider,
281
- seed_slider,
282
- provider_radio, # Provider selection
283
- byok_textbox, # New BYOK textbox
284
- custom_model_box, # Custom Model
285
- model_search_box, # Model search box
286
- featured_model_radio # Featured model radio
287
- ],
288
- fill_height=True,
289
- chatbot=chatbot,
290
- theme="Nymbo/Nymbo_Theme",
291
  )
292
- print("ChatInterface object created.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
- with demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  # Connect the model filter to update the radio choices
296
  model_search_box.change(
297
  fn=filter_models,
298
  inputs=model_search_box,
299
  outputs=featured_model_radio
300
  )
301
- print("Model search box change event linked.")
302
 
303
  # Connect the featured model radio to update the custom model box
304
  featured_model_radio.change(
@@ -306,7 +606,6 @@ with demo:
306
  inputs=featured_model_radio,
307
  outputs=custom_model_box
308
  )
309
- print("Featured model radio button change event linked.")
310
 
311
  # Connect the BYOK textbox to validate provider selection
312
  byok_textbox.change(
@@ -314,7 +613,6 @@ with demo:
314
  inputs=[byok_textbox, provider_radio],
315
  outputs=provider_radio
316
  )
317
- print("BYOK textbox change event linked.")
318
 
319
  # Also validate provider when the radio changes to ensure consistency
320
  provider_radio.change(
@@ -322,10 +620,7 @@ with demo:
322
  inputs=[byok_textbox, provider_radio],
323
  outputs=provider_radio
324
  )
325
- print("Provider radio button change event linked.")
326
-
327
- print("Gradio interface initialized.")
328
 
329
  if __name__ == "__main__":
330
- print("Launching the demo application.")
331
  demo.launch(show_api=True)
 
2
  from huggingface_hub import InferenceClient
3
  import os
4
  import json
5
+ import base64
6
+ from PIL import Image
7
+ import io
8
 
9
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
10
  print("Access token loaded.")
11
 
12
+ def encode_image_to_base64(image):
13
+ """Convert a PIL Image to a base64 string"""
14
+ buffered = io.BytesIO()
15
+ image.save(buffered, format="JPEG")
16
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
17
+ return img_str
18
+
19
+ def process_uploaded_images(images):
20
+ """Process uploaded images and return image_url dicts for API submission"""
21
+ if not images:
22
+ return []
23
+
24
+ image_contents = []
25
+ for img in images:
26
+ if isinstance(img, str): # Path to an image
27
+ try:
28
+ image = Image.open(img)
29
+ base64_image = encode_image_to_base64(image)
30
+ image_contents.append({
31
+ "type": "image_url",
32
+ "image_url": {
33
+ "url": f"data:image/jpeg;base64,{base64_image}"
34
+ }
35
+ })
36
+ except Exception as e:
37
+ print(f"Error processing image {img}: {e}")
38
+ else: # Already a PIL Image
39
+ try:
40
+ base64_image = encode_image_to_base64(img)
41
+ image_contents.append({
42
+ "type": "image_url",
43
+ "image_url": {
44
+ "url": f"data:image/jpeg;base64,{base64_image}"
45
+ }
46
+ })
47
+ except Exception as e:
48
+ print(f"Error processing uploaded image: {e}")
49
+
50
+ return image_contents
51
+
52
  def respond(
53
  message,
54
+ images, # New parameter for uploaded images
55
  history: list[tuple[str, str]],
56
  system_message,
57
  max_tokens,
 
60
  frequency_penalty,
61
  seed,
62
  provider,
63
+ custom_api_key,
64
  custom_model,
65
  model_search_term,
66
  selected_model
67
  ):
68
  print(f"Received message: {message}")
69
+ print(f"Received images: {len(images) if images else 0} image(s)")
70
  print(f"History: {history}")
71
  print(f"System message: {system_message}")
72
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
73
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
74
  print(f"Selected provider: {provider}")
75
+ print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
76
  print(f"Selected model (custom_model): {custom_model}")
77
  print(f"Model search term: {model_search_term}")
78
  print(f"Selected model from radio: {selected_model}")
 
102
  for val in history:
103
  user_part = val[0]
104
  assistant_part = val[1]
105
+
106
+ # Process user messages (could be multimodal)
107
  if user_part:
108
+ # Check if the user message is already multimodal (from history)
109
+ if isinstance(user_part, list):
110
+ # Already in multimodal format, use as is
111
+ messages.append({"role": "user", "content": user_part})
112
+ print("Added multimodal user message from history")
113
+ else:
114
+ # Simple text message
115
+ messages.append({"role": "user", "content": user_part})
116
+ print(f"Added user message to context: {user_part}")
117
+
118
+ # Process assistant messages (always text)
119
  if assistant_part:
120
  messages.append({"role": "assistant", "content": assistant_part})
121
  print(f"Added assistant message to context: {assistant_part}")
122
 
123
+ # Process the current message (could include images)
124
+ current_message_content = []
125
+
126
+ # Add text content if provided
127
+ if message and message.strip():
128
+ current_message_content.append({
129
+ "type": "text",
130
+ "text": message
131
+ })
132
+
133
+ # Process and add image content if provided
134
+ if images:
135
+ image_contents = process_uploaded_images(images)
136
+ current_message_content.extend(image_contents)
137
+
138
+ # Format the final message based on content
139
+ if current_message_content:
140
+ if len(current_message_content) == 1 and "type" in current_message_content[0] and current_message_content[0]["type"] == "text":
141
+ # If only text, use simple string format for compatibility with all models
142
+ messages.append({"role": "user", "content": current_message_content[0]["text"]})
143
+ print(f"Added simple text user message: {current_message_content[0]['text']}")
144
+ else:
145
+ # If multimodal content, use the array format
146
+ messages.append({"role": "user", "content": current_message_content})
147
+ print(f"Added multimodal user message with {len(current_message_content)} parts")
148
+
149
  # Determine which model to use, prioritizing custom_model if provided
150
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
151
  print(f"Model selected for inference: {model_to_use}")
 
168
  # Use the InferenceClient for making the request
169
  try:
170
  # Create a generator for the streaming response
 
171
  stream = client.chat_completion(
172
  model=model_to_use,
173
  messages=messages,
174
  stream=True,
175
+ **parameters
176
  )
177
 
178
  # Print a starting message for token streaming
 
206
  return gr.update(value="hf-inference")
207
  return gr.update(value=provider)
208
 
209
+ # Function to update featured model list based on search
210
+ def filter_models(search_term):
211
+ print(f"Filtering models with search term: {search_term}")
212
+ filtered = [m for m in models_list if search_term.lower() in m.lower()]
213
+ print(f"Filtered models: {filtered}")
214
+ return gr.update(choices=filtered)
 
215
 
216
+ def set_custom_model_from_radio(selected):
217
+ """
218
+ This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
219
+ We will update the Custom Model text box with that selection automatically.
220
+ """
221
+ print(f"Featured model selected: {selected}")
222
+ return selected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ # Define multimodal models list
225
+ multimodal_models_list = [
226
+ "meta-llama/Llama-3.3-70B-Vision-Instruct",
227
+ "meta-llama/Llama-3.1-8B-Vision-Instruct",
228
+ "Qwen/Qwen2.5-VL-7B-Chat",
229
+ "Qwen/Qwen2.5-VL-3B-Chat",
230
+ "microsoft/Phi-3-vision-instruct",
231
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
232
+ "deepseek-ai/DeepSeek-VL-7B-Chat",
233
+ "01-ai/Yi-VL-6B-Chat",
234
+ "01-ai/Yi-VL-34B-Chat",
235
+ "Cohere/command-vision-nightly",
236
+ "LLaVA/llava-1.6-34b-hf",
237
+ "fireworks-ai/FireworksBridge-Vision-Alpha",
238
+ "liuhaotian/llava-v1.6-vicuna-13b",
239
  ]
240
 
241
+ # Add multimodal models to the full model list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  models_list = [
243
  "meta-llama/Llama-3.3-70B-Instruct",
244
  "meta-llama/Llama-3.1-70B-Instruct",
 
268
  "HuggingFaceTB/SmolLM2-360M-Instruct",
269
  "tiiuae/falcon-7b-instruct",
270
  "01-ai/Yi-1.5-34B-Chat",
271
+ ] + multimodal_models_list # Add multimodal models to the list
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
+ # Create a custom ChatBot class that will display images
274
+ def format_history_with_images(history):
275
  """
276
+ Format history for display in the chatbot, handling multimodal content
 
277
  """
278
+ formatted_history = []
279
+
280
+ for user_msg, assistant_msg in history:
281
+ # Process user message
282
+ if isinstance(user_msg, list):
283
+ # Multimodal message
284
+ formatted_user_msg = []
285
+ for item in user_msg:
286
+ if item.get("type") == "text":
287
+ formatted_user_msg.append(item["text"])
288
+ elif item.get("type") == "image_url":
289
+ # Extract the base64 image data
290
+ img_url = item.get("image_url", {}).get("url", "")
291
+ if img_url.startswith("data:image/"):
292
+ formatted_user_msg.append((img_url, "image"))
293
+
294
+ formatted_history.append((formatted_user_msg, assistant_msg))
295
+ else:
296
+ # Regular text message
297
+ formatted_history.append((user_msg, assistant_msg))
298
+
299
+ return formatted_history
300
+
301
+ # GRADIO UI
302
 
303
+ # Create a custom chatbot that can display images
304
+ chatbot = gr.Chatbot(
305
+ height=600,
306
+ show_copy_button=True,
307
+ placeholder="Select a model and begin chatting",
308
+ layout="panel"
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  )
310
+ print("Chatbot interface created.")
311
+
312
+ # Create a virtual column layout for the message input area
313
+ with gr.Blocks() as msg_input:
314
+ with gr.Row():
315
+ with gr.Column(scale=4):
316
+ msg = gr.Textbox(
317
+ placeholder="Enter text here or upload an image",
318
+ show_label=False,
319
+ container=False,
320
+ lines=3
321
+ )
322
+ with gr.Column(scale=1, min_width=50):
323
+ img_upload = gr.Image(
324
+ type="pil",
325
+ label="Upload Image",
326
+ show_label=False,
327
+ icon="🖼️",
328
+ container=True,
329
+ height=50,
330
+ width=50
331
+ )
332
+
333
+ # Basic input components
334
+ system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
335
 
336
+ with gr.Accordion("Model Settings", open=False):
337
+ with gr.Row():
338
+ with gr.Column():
339
+ max_tokens_slider = gr.Slider(
340
+ minimum=1,
341
+ maximum=4096,
342
+ value=512,
343
+ step=1,
344
+ label="Max tokens"
345
+ )
346
+
347
+ temperature_slider = gr.Slider(
348
+ minimum=0.1,
349
+ maximum=4.0,
350
+ value=0.7,
351
+ step=0.1,
352
+ label="Temperature"
353
+ )
354
+
355
+ with gr.Column():
356
+ top_p_slider = gr.Slider(
357
+ minimum=0.1,
358
+ maximum=1.0,
359
+ value=0.95,
360
+ step=0.05,
361
+ label="Top-P"
362
+ )
363
+
364
+ frequency_penalty_slider = gr.Slider(
365
+ minimum=-2.0,
366
+ maximum=2.0,
367
+ value=0.0,
368
+ step=0.1,
369
+ label="Frequency Penalty"
370
+ )
371
+
372
+ with gr.Row():
373
+ seed_slider = gr.Slider(
374
+ minimum=-1,
375
+ maximum=65535,
376
+ value=-1,
377
+ step=1,
378
+ label="Seed (-1 for random)"
379
+ )
380
+
381
+ with gr.Accordion("Model Selection", open=False):
382
+ with gr.Row():
383
+ with gr.Column():
384
+ # Provider selection
385
+ providers_list = [
386
+ "hf-inference", # Default Hugging Face Inference
387
+ "cerebras", # Cerebras provider
388
+ "together", # Together AI
389
+ "sambanova", # SambaNova
390
+ "novita", # Novita AI
391
+ "cohere", # Cohere
392
+ "fireworks-ai", # Fireworks AI
393
+ "hyperbolic", # Hyperbolic
394
+ "nebius", # Nebius
395
+ ]
396
+
397
+ provider_radio = gr.Radio(
398
+ choices=providers_list,
399
+ value="hf-inference",
400
+ label="Inference Provider",
401
+ info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
402
+ )
403
+
404
+ # New BYOK textbox - Added for the new feature
405
+ byok_textbox = gr.Textbox(
406
+ value="",
407
+ label="BYOK (Bring Your Own Key)",
408
+ info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
409
+ placeholder="Enter your Hugging Face API token",
410
+ type="password" # Hide the API key for security
411
+ )
412
+
413
+ with gr.Column():
414
+ # Model selection components
415
+ model_search_box = gr.Textbox(
416
+ label="Filter Models",
417
+ placeholder="Search for a featured model...",
418
+ lines=1
419
+ )
420
+
421
+ featured_model_radio = gr.Radio(
422
+ label="Select a model below",
423
+ choices=models_list,
424
+ value="meta-llama/Llama-3.3-70B-Vision-Instruct", # Default to a multimodal model
425
+ interactive=True
426
+ )
427
+
428
+ # Custom model box
429
+ custom_model_box = gr.Textbox(
430
+ value="",
431
+ label="Custom Model",
432
+ info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
433
+ placeholder="meta-llama/Llama-3.3-70B-Vision-Instruct"
434
+ )
435
+
436
+ gr.Markdown("[See all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
437
+
438
+ # Main Gradio interface
439
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
440
+ gr.Markdown("# 🤖 Serverless-MultiModal-Hub")
441
+
442
+ with gr.Row():
443
+ with gr.Column(scale=3):
444
+ # Display the chatbot
445
+ chatbot_interface = chatbot
446
+
447
+ # Custom submit function to handle multimodal inputs
448
+ def submit_message(message, images, history):
449
+ history = history or []
450
+
451
+ # Format the message content based on whether there are images
452
+ if images:
453
+ # Create a multimodal message format for history display
454
+ user_msg = []
455
+ if message:
456
+ user_msg.append({"type": "text", "text": message})
457
+
458
+ # Add each image as an image_url item
459
+ for img in images:
460
+ if img is not None:
461
+ img_base64 = encode_image_to_base64(img)
462
+ img_url = f"data:image/jpeg;base64,{img_base64}"
463
+ user_msg.append({
464
+ "type": "image_url",
465
+ "image_url": {"url": img_url}
466
+ })
467
+
468
+ # Add to history
469
+ history.append([user_msg, None])
470
+ else:
471
+ # Text-only message
472
+ if message:
473
+ history.append([message, None])
474
+ else:
475
+ # No content to submit
476
+ return history
477
+
478
+ return history
479
+
480
+ # Create message input
481
+ with gr.Group():
482
+ with gr.Row():
483
+ with gr.Column(scale=4):
484
+ text_input = gr.Textbox(
485
+ placeholder="Enter text here",
486
+ show_label=False,
487
+ container=False,
488
+ lines=3
489
+ )
490
+ with gr.Column(scale=1, min_width=50):
491
+ image_input = gr.Image(
492
+ type="pil",
493
+ label="Upload Image",
494
+ show_label=False,
495
+ sources=["upload", "clipboard"],
496
+ tool="editor",
497
+ height=100,
498
+ visible=True
499
+ )
500
+
501
+ # Submit button
502
+ submit_btn = gr.Button("Submit", variant="primary")
503
+
504
+ # Clear button
505
+ clear_btn = gr.Button("Clear")
506
+
507
+ with gr.Column(scale=1):
508
+ # Put settings here
509
+ system_message_box = gr.Textbox(
510
+ value="",
511
+ placeholder="You are a helpful assistant that can understand images.",
512
+ label="System Prompt",
513
+ lines=2
514
+ )
515
+
516
+ with gr.Accordion("Model Selection", open=False):
517
+ # Provider selection
518
+ provider_radio = gr.Radio(
519
+ choices=providers_list,
520
+ value="hf-inference",
521
+ label="Inference Provider"
522
+ )
523
+
524
+ # BYOK textbox
525
+ byok_textbox = gr.Textbox(
526
+ value="",
527
+ label="API Key",
528
+ placeholder="Enter provider API key",
529
+ type="password"
530
+ )
531
+
532
+ # Model selection components
533
+ model_search_box = gr.Textbox(
534
+ label="Filter Models",
535
+ placeholder="Search models...",
536
+ lines=1
537
+ )
538
+
539
+ featured_model_radio = gr.Radio(
540
+ label="Models",
541
+ choices=models_list,
542
+ value="meta-llama/Llama-3.3-70B-Vision-Instruct",
543
+ interactive=True
544
+ )
545
+
546
+ custom_model_box = gr.Textbox(
547
+ value="",
548
+ label="Custom Model",
549
+ placeholder="Enter model path"
550
+ )
551
+
552
+ gr.Markdown("[View all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
553
+
554
+ with gr.Accordion("Model Settings", open=False):
555
+ max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
556
+ temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
557
+ top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
558
+ frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
559
+ seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
560
+
561
+ # Connect the submit button
562
+ submit_btn.click(
563
+ fn=submit_message,
564
+ inputs=[text_input, image_input, chatbot_interface],
565
+ outputs=[chatbot_interface],
566
+ queue=False
567
+ ).then(
568
+ fn=respond,
569
+ inputs=[
570
+ text_input,
571
+ image_input,
572
+ chatbot_interface,
573
+ system_message_box,
574
+ max_tokens_slider,
575
+ temperature_slider,
576
+ top_p_slider,
577
+ frequency_penalty_slider,
578
+ seed_slider,
579
+ provider_radio,
580
+ byok_textbox,
581
+ custom_model_box,
582
+ model_search_box,
583
+ featured_model_radio
584
+ ],
585
+ outputs=[chatbot_interface],
586
+ queue=True
587
+ ).then(
588
+ fn=lambda: (None, None), # Clear inputs after submission
589
+ inputs=None,
590
+ outputs=[text_input, image_input]
591
+ )
592
+
593
+ # Clear button functionality
594
+ clear_btn.click(lambda: None, None, chatbot_interface, queue=False)
595
+
596
  # Connect the model filter to update the radio choices
597
  model_search_box.change(
598
  fn=filter_models,
599
  inputs=model_search_box,
600
  outputs=featured_model_radio
601
  )
 
602
 
603
  # Connect the featured model radio to update the custom model box
604
  featured_model_radio.change(
 
606
  inputs=featured_model_radio,
607
  outputs=custom_model_box
608
  )
 
609
 
610
  # Connect the BYOK textbox to validate provider selection
611
  byok_textbox.change(
 
613
  inputs=[byok_textbox, provider_radio],
614
  outputs=provider_radio
615
  )
 
616
 
617
  # Also validate provider when the radio changes to ensure consistency
618
  provider_radio.change(
 
620
  inputs=[byok_textbox, provider_radio],
621
  outputs=provider_radio
622
  )
 
 
 
623
 
624
  if __name__ == "__main__":
625
+ print("Launching Serverless-MultiModal-Hub application.")
626
  demo.launch(show_api=True)