Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 2

Commit

4264b3e

verified ·

1 Parent(s): 385a33a

Update app.py

Browse files

Files changed (1) hide show

app.py +436 -141

app.py CHANGED Viewed

@@ -2,12 +2,56 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import json
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -16,18 +60,19 @@ def respond(
     frequency_penalty,
     seed,
     provider,
-    custom_api_key,  # New parameter for BYOK
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
-    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")  # Log whether a custom key was provided without printing the key
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
@@ -57,17 +102,50 @@ def respond(
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
         if user_part:
-            messages.append({"role": "user", "content": user_part})
-            print(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
-    # Append the latest user message
-    messages.append({"role": "user", "content": message})
-    print("Latest user message appended.")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     print(f"Model selected for inference: {model_to_use}")
@@ -90,12 +168,11 @@ def respond(
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
-        # The provider is already set when initializing the client
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
-            **parameters  # Pass all other parameters
         )
         # Print a starting message for token streaming
@@ -129,94 +206,39 @@ def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
-# GRADIO UI
-chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
-print("Chatbot interface created.")
-# Basic input components
-system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
-max_tokens_slider = gr.Slider(
-    minimum=1,
-    maximum=4096,
-    value=512,
-    step=1,
-    label="Max tokens"
-)
-temperature_slider = gr.Slider(
-    minimum=0.1,
-    maximum=4.0,
-    value=0.7,
-    step=0.1,
-    label="Temperature"
-)
-top_p_slider = gr.Slider(
-    minimum=0.1,
-    maximum=1.0,
-    value=0.95,
-    step=0.05,
-    label="Top-P"
-)
-frequency_penalty_slider = gr.Slider(
-    minimum=-2.0,
-    maximum=2.0,
-    value=0.0,
-    step=0.1,
-    label="Frequency Penalty"
-)
-seed_slider = gr.Slider(
-    minimum=-1,
-    maximum=65535,
-    value=-1,
-    step=1,
-    label="Seed (-1 for random)"
-)
-# Provider selection
-providers_list = [
-    "hf-inference",  # Default Hugging Face Inference
-    "cerebras",      # Cerebras provider
-    "together",      # Together AI
-    "sambanova",     # SambaNova
-    "novita",        # Novita AI
-    "cohere",        # Cohere
-    "fireworks-ai",  # Fireworks AI
-    "hyperbolic",    # Hyperbolic
-    "nebius",        # Nebius
 ]
-provider_radio = gr.Radio(
-    choices=providers_list,
-    value="hf-inference",
-    label="Inference Provider",
-    info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
-)
-# New BYOK textbox - Added for the new feature
-byok_textbox = gr.Textbox(
-    value="",
-    label="BYOK (Bring Your Own Key)",
-    info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
-    placeholder="Enter your Hugging Face API token",
-    type="password"  # Hide the API key for security
-)
-# Custom model box
-custom_model_box = gr.Textbox(
-    value="",
-    label="Custom Model",
-    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
-    placeholder="meta-llama/Llama-3.3-70B-Instruct"
-)
-# Model selection components
-model_search_box = gr.Textbox(
-    label="Filter Models",
-    placeholder="Search for a featured model...",
-    lines=1
-)
 models_list = [
     "meta-llama/Llama-3.3-70B-Instruct",
     "meta-llama/Llama-3.1-70B-Instruct",
@@ -246,59 +268,337 @@ models_list = [
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "tiiuae/falcon-7b-instruct",
     "01-ai/Yi-1.5-34B-Chat",
-]
-featured_model_radio = gr.Radio(
-    label="Select a model below",
-    choices=models_list,
-    value="meta-llama/Llama-3.3-70B-Instruct",
-    interactive=True
-)
-def filter_models(search_term):
-    print(f"Filtering models with search term: {search_term}")
-    filtered = [m for m in models_list if search_term.lower() in m.lower()]
-    print(f"Filtered models: {filtered}")
-    return gr.update(choices=filtered)
-def set_custom_model_from_radio(selected):
     """
-    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
-    We will update the Custom Model text box with that selection automatically.
     """
-    print(f"Featured model selected: {selected}")
-    return selected
-# Create the Gradio interface
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        system_message_box,
-        max_tokens_slider,
-        temperature_slider,
-        top_p_slider,
-        frequency_penalty_slider,
-        seed_slider,
-        provider_radio,     # Provider selection
-        byok_textbox,       # New BYOK textbox
-        custom_model_box,   # Custom Model
-        model_search_box,   # Model search box
-        featured_model_radio # Featured model radio
-    ],
-    fill_height=True,
-    chatbot=chatbot,
-    theme="Nymbo/Nymbo_Theme",
 )
-print("ChatInterface object created.")
-with demo:
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
         outputs=featured_model_radio
     )
-    print("Model search box change event linked.")
     # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
@@ -306,7 +606,6 @@ with demo:
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
-    print("Featured model radio button change event linked.")
     # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
@@ -314,7 +613,6 @@ with demo:
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
-    print("BYOK textbox change event linked.")
     # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
@@ -322,10 +620,7 @@ with demo:
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
-    print("Provider radio button change event linked.")
-print("Gradio interface initialized.")
 if __name__ == "__main__":
-    print("Launching the demo application.")
     demo.launch(show_api=True)

 from huggingface_hub import InferenceClient
 import os
 import json
+import base64
+from PIL import Image
+import io
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
+def encode_image_to_base64(image):
+    """Convert a PIL Image to a base64 string"""
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return img_str
+def process_uploaded_images(images):
+    """Process uploaded images and return image_url dicts for API submission"""
+    if not images:
+        return []
+    image_contents = []
+    for img in images:
+        if isinstance(img, str):  # Path to an image
+            try:
+                image = Image.open(img)
+                base64_image = encode_image_to_base64(image)
+                image_contents.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                })
+            except Exception as e:
+                print(f"Error processing image {img}: {e}")
+        else:  # Already a PIL Image
+            try:
+                base64_image = encode_image_to_base64(img)
+                image_contents.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                })
+            except Exception as e:
+                print(f"Error processing uploaded image: {e}")
+    return image_contents
 def respond(
     message,
+    images,  # New parameter for uploaded images
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     frequency_penalty,
     seed,
     provider,
+    custom_api_key,
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
+    print(f"Received images: {len(images) if images else 0} image(s)")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
+    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
+        # Process user messages (could be multimodal)
         if user_part:
+            # Check if the user message is already multimodal (from history)
+            if isinstance(user_part, list):
+                # Already in multimodal format, use as is
+                messages.append({"role": "user", "content": user_part})
+                print("Added multimodal user message from history")
+            else:
+                # Simple text message
+                messages.append({"role": "user", "content": user_part})
+                print(f"Added user message to context: {user_part}")
+        # Process assistant messages (always text)
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
+    # Process the current message (could include images)
+    current_message_content = []
+    # Add text content if provided
+    if message and message.strip():
+        current_message_content.append({
+            "type": "text",
+            "text": message
+        })
+    # Process and add image content if provided
+    if images:
+        image_contents = process_uploaded_images(images)
+        current_message_content.extend(image_contents)
+    # Format the final message based on content
+    if current_message_content:
+        if len(current_message_content) == 1 and "type" in current_message_content[0] and current_message_content[0]["type"] == "text":
+            # If only text, use simple string format for compatibility with all models
+            messages.append({"role": "user", "content": current_message_content[0]["text"]})
+            print(f"Added simple text user message: {current_message_content[0]['text']}")
+        else:
+            # If multimodal content, use the array format
+            messages.append({"role": "user", "content": current_message_content})
+            print(f"Added multimodal user message with {len(current_message_content)} parts")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     print(f"Model selected for inference: {model_to_use}")
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
+            **parameters
         )
         # Print a starting message for token streaming
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
+# Function to update featured model list based on search
+def filter_models(search_term):
+    print(f"Filtering models with search term: {search_term}")
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
+    print(f"Filtered models: {filtered}")
+    return gr.update(choices=filtered)
+def set_custom_model_from_radio(selected):
+    """
+    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+    We will update the Custom Model text box with that selection automatically.
+    """
+    print(f"Featured model selected: {selected}")
+    return selected
+# Define multimodal models list
+multimodal_models_list = [
+    "meta-llama/Llama-3.3-70B-Vision-Instruct",
+    "meta-llama/Llama-3.1-8B-Vision-Instruct",
+    "Qwen/Qwen2.5-VL-7B-Chat",
+    "Qwen/Qwen2.5-VL-3B-Chat",
+    "microsoft/Phi-3-vision-instruct",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "deepseek-ai/DeepSeek-VL-7B-Chat",
+    "01-ai/Yi-VL-6B-Chat",
+    "01-ai/Yi-VL-34B-Chat",
+    "Cohere/command-vision-nightly",
+    "LLaVA/llava-1.6-34b-hf",
+    "fireworks-ai/FireworksBridge-Vision-Alpha",
+    "liuhaotian/llava-v1.6-vicuna-13b",
 ]
+# Add multimodal models to the full model list
 models_list = [
     "meta-llama/Llama-3.3-70B-Instruct",
     "meta-llama/Llama-3.1-70B-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "tiiuae/falcon-7b-instruct",
     "01-ai/Yi-1.5-34B-Chat",
+] + multimodal_models_list  # Add multimodal models to the list
+# Create a custom ChatBot class that will display images
+def format_history_with_images(history):
     """
+    Format history for display in the chatbot, handling multimodal content
     """
+    formatted_history = []
+    for user_msg, assistant_msg in history:
+        # Process user message
+        if isinstance(user_msg, list):
+            # Multimodal message
+            formatted_user_msg = []
+            for item in user_msg:
+                if item.get("type") == "text":
+                    formatted_user_msg.append(item["text"])
+                elif item.get("type") == "image_url":
+                    # Extract the base64 image data
+                    img_url = item.get("image_url", {}).get("url", "")
+                    if img_url.startswith("data:image/"):
+                        formatted_user_msg.append((img_url, "image"))
+            formatted_history.append((formatted_user_msg, assistant_msg))
+        else:
+            # Regular text message
+            formatted_history.append((user_msg, assistant_msg))
+    return formatted_history
+# GRADIO UI
+# Create a custom chatbot that can display images
+chatbot = gr.Chatbot(
+    height=600,
+    show_copy_button=True,
+    placeholder="Select a model and begin chatting",
+    layout="panel"
 )
+print("Chatbot interface created.")
+# Create a virtual column layout for the message input area
+with gr.Blocks() as msg_input:
+    with gr.Row():
+        with gr.Column(scale=4):
+            msg = gr.Textbox(
+                placeholder="Enter text here or upload an image",
+                show_label=False,
+                container=False,
+                lines=3
+            )
+        with gr.Column(scale=1, min_width=50):
+            img_upload = gr.Image(
+                type="pil",
+                label="Upload Image",
+                show_label=False,
+                icon="🖼️",
+                container=True,
+                height=50,
+                width=50
+            )
+# Basic input components
+system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
+with gr.Accordion("Model Settings", open=False):
+    with gr.Row():
+        with gr.Column():
+            max_tokens_slider = gr.Slider(
+                minimum=1,
+                maximum=4096,
+                value=512,
+                step=1,
+                label="Max tokens"
+            )
+            temperature_slider = gr.Slider(
+                minimum=0.1,
+                maximum=4.0,
+                value=0.7,
+                step=0.1,
+                label="Temperature"
+            )
+        with gr.Column():
+            top_p_slider = gr.Slider(
+                minimum=0.1,
+                maximum=1.0,
+                value=0.95,
+                step=0.05,
+                label="Top-P"
+            )
+            frequency_penalty_slider = gr.Slider(
+                minimum=-2.0,
+                maximum=2.0,
+                value=0.0,
+                step=0.1,
+                label="Frequency Penalty"
+            )
+    with gr.Row():
+        seed_slider = gr.Slider(
+            minimum=-1,
+            maximum=65535,
+            value=-1,
+            step=1,
+            label="Seed (-1 for random)"
+        )
+with gr.Accordion("Model Selection", open=False):
+    with gr.Row():
+        with gr.Column():
+            # Provider selection
+            providers_list = [
+                "hf-inference",  # Default Hugging Face Inference
+                "cerebras",      # Cerebras provider
+                "together",      # Together AI
+                "sambanova",     # SambaNova
+                "novita",        # Novita AI
+                "cohere",        # Cohere
+                "fireworks-ai",  # Fireworks AI
+                "hyperbolic",    # Hyperbolic
+                "nebius",        # Nebius
+            ]
+            provider_radio = gr.Radio(
+                choices=providers_list,
+                value="hf-inference",
+                label="Inference Provider",
+                info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
+            )
+            # New BYOK textbox - Added for the new feature
+            byok_textbox = gr.Textbox(
+                value="",
+                label="BYOK (Bring Your Own Key)",
+                info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
+                placeholder="Enter your Hugging Face API token",
+                type="password"  # Hide the API key for security
+            )
+        with gr.Column():
+            # Model selection components
+            model_search_box = gr.Textbox(
+                label="Filter Models",
+                placeholder="Search for a featured model...",
+                lines=1
+            )
+            featured_model_radio = gr.Radio(
+                label="Select a model below",
+                choices=models_list,
+                value="meta-llama/Llama-3.3-70B-Vision-Instruct",  # Default to a multimodal model
+                interactive=True
+            )
+            # Custom model box
+            custom_model_box = gr.Textbox(
+                value="",
+                label="Custom Model",
+                info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+                placeholder="meta-llama/Llama-3.3-70B-Vision-Instruct"
+            )
+            gr.Markdown("[See all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
+# Main Gradio interface
+with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
+    gr.Markdown("# 🤖 Serverless-MultiModal-Hub")
+    with gr.Row():
+        with gr.Column(scale=3):
+            # Display the chatbot
+            chatbot_interface = chatbot
+            # Custom submit function to handle multimodal inputs
+            def submit_message(message, images, history):
+                history = history or []
+                # Format the message content based on whether there are images
+                if images:
+                    # Create a multimodal message format for history display
+                    user_msg = []
+                    if message:
+                        user_msg.append({"type": "text", "text": message})
+                    # Add each image as an image_url item
+                    for img in images:
+                        if img is not None:
+                            img_base64 = encode_image_to_base64(img)
+                            img_url = f"data:image/jpeg;base64,{img_base64}"
+                            user_msg.append({
+                                "type": "image_url",
+                                "image_url": {"url": img_url}
+                            })
+                    # Add to history
+                    history.append([user_msg, None])
+                else:
+                    # Text-only message
+                    if message:
+                        history.append([message, None])
+                    else:
+                        # No content to submit
+                        return history
+                return history
+            # Create message input
+            with gr.Group():
+                with gr.Row():
+                    with gr.Column(scale=4):
+                        text_input = gr.Textbox(
+                            placeholder="Enter text here",
+                            show_label=False,
+                            container=False,
+                            lines=3
+                        )
+                    with gr.Column(scale=1, min_width=50):
+                        image_input = gr.Image(
+                            type="pil",
+                            label="Upload Image",
+                            show_label=False,
+                            sources=["upload", "clipboard"],
+                            tool="editor",
+                            height=100,
+                            visible=True
+                        )
+            # Submit button
+            submit_btn = gr.Button("Submit", variant="primary")
+            # Clear button
+            clear_btn = gr.Button("Clear")
+        with gr.Column(scale=1):
+            # Put settings here
+            system_message_box = gr.Textbox(
+                value="",
+                placeholder="You are a helpful assistant that can understand images.",
+                label="System Prompt",
+                lines=2
+            )
+            with gr.Accordion("Model Selection", open=False):
+                # Provider selection
+                provider_radio = gr.Radio(
+                    choices=providers_list,
+                    value="hf-inference",
+                    label="Inference Provider"
+                )
+                # BYOK textbox
+                byok_textbox = gr.Textbox(
+                    value="",
+                    label="API Key",
+                    placeholder="Enter provider API key",
+                    type="password"
+                )
+                # Model selection components
+                model_search_box = gr.Textbox(
+                    label="Filter Models",
+                    placeholder="Search models...",
+                    lines=1
+                )
+                featured_model_radio = gr.Radio(
+                    label="Models",
+                    choices=models_list,
+                    value="meta-llama/Llama-3.3-70B-Vision-Instruct",
+                    interactive=True
+                )
+                custom_model_box = gr.Textbox(
+                    value="",
+                    label="Custom Model",
+                    placeholder="Enter model path"
+                )
+                gr.Markdown("[View all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
+            with gr.Accordion("Model Settings", open=False):
+                max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
+                temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
+                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
+                frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
+                seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
+    # Connect the submit button
+    submit_btn.click(
+        fn=submit_message,
+        inputs=[text_input, image_input, chatbot_interface],
+        outputs=[chatbot_interface],
+        queue=False
+    ).then(
+        fn=respond,
+        inputs=[
+            text_input,
+            image_input,
+            chatbot_interface,
+            system_message_box,
+            max_tokens_slider,
+            temperature_slider,
+            top_p_slider,
+            frequency_penalty_slider,
+            seed_slider,
+            provider_radio,
+            byok_textbox,
+            custom_model_box,
+            model_search_box,
+            featured_model_radio
+        ],
+        outputs=[chatbot_interface],
+        queue=True
+    ).then(
+        fn=lambda: (None, None),  # Clear inputs after submission
+        inputs=None,
+        outputs=[text_input, image_input]
+    )
+    # Clear button functionality
+    clear_btn.click(lambda: None, None, chatbot_interface, queue=False)
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
         outputs=featured_model_radio
     )
     # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
     # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
 if __name__ == "__main__":
+    print("Launching Serverless-MultiModal-Hub application.")
     demo.launch(show_api=True)