Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Nymbo commited on May 2

Commit

a9862a1

verified ·

1 Parent(s): 4264b3e

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -436

app.py CHANGED Viewed

@@ -2,56 +2,12 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import os
 import json
-import base64
-from PIL import Image
-import io
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
-def encode_image_to_base64(image):
-    """Convert a PIL Image to a base64 string"""
-    buffered = io.BytesIO()
-    image.save(buffered, format="JPEG")
-    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-    return img_str
-def process_uploaded_images(images):
-    """Process uploaded images and return image_url dicts for API submission"""
-    if not images:
-        return []
-    image_contents = []
-    for img in images:
-        if isinstance(img, str):  # Path to an image
-            try:
-                image = Image.open(img)
-                base64_image = encode_image_to_base64(image)
-                image_contents.append({
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}"
-                    }
-                })
-            except Exception as e:
-                print(f"Error processing image {img}: {e}")
-        else:  # Already a PIL Image
-            try:
-                base64_image = encode_image_to_base64(img)
-                image_contents.append({
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/jpeg;base64,{base64_image}"
-                    }
-                })
-            except Exception as e:
-                print(f"Error processing uploaded image: {e}")
-    return image_contents
 def respond(
     message,
-    images,  # New parameter for uploaded images
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
@@ -60,19 +16,18 @@ def respond(
     frequency_penalty,
     seed,
     provider,
-    custom_api_key,
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
-    print(f"Received images: {len(images) if images else 0} image(s)")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
-    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
@@ -102,50 +57,17 @@ def respond(
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
-        # Process user messages (could be multimodal)
         if user_part:
-            # Check if the user message is already multimodal (from history)
-            if isinstance(user_part, list):
-                # Already in multimodal format, use as is
-                messages.append({"role": "user", "content": user_part})
-                print("Added multimodal user message from history")
-            else:
-                # Simple text message
-                messages.append({"role": "user", "content": user_part})
-                print(f"Added user message to context: {user_part}")
-        # Process assistant messages (always text)
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
-    # Process the current message (could include images)
-    current_message_content = []
-    # Add text content if provided
-    if message and message.strip():
-        current_message_content.append({
-            "type": "text",
-            "text": message
-        })
-    # Process and add image content if provided
-    if images:
-        image_contents = process_uploaded_images(images)
-        current_message_content.extend(image_contents)
-    # Format the final message based on content
-    if current_message_content:
-        if len(current_message_content) == 1 and "type" in current_message_content[0] and current_message_content[0]["type"] == "text":
-            # If only text, use simple string format for compatibility with all models
-            messages.append({"role": "user", "content": current_message_content[0]["text"]})
-            print(f"Added simple text user message: {current_message_content[0]['text']}")
-        else:
-            # If multimodal content, use the array format
-            messages.append({"role": "user", "content": current_message_content})
-            print(f"Added multimodal user message with {len(current_message_content)} parts")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     print(f"Model selected for inference: {model_to_use}")
@@ -168,11 +90,12 @@ def respond(
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
-            **parameters
         )
         # Print a starting message for token streaming
@@ -206,39 +129,94 @@ def validate_provider(api_key, provider):
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
-# Function to update featured model list based on search
-def filter_models(search_term):
-    print(f"Filtering models with search term: {search_term}")
-    filtered = [m for m in models_list if search_term.lower() in m.lower()]
-    print(f"Filtered models: {filtered}")
-    return gr.update(choices=filtered)
-def set_custom_model_from_radio(selected):
-    """
-    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
-    We will update the Custom Model text box with that selection automatically.
-    """
-    print(f"Featured model selected: {selected}")
-    return selected
-# Define multimodal models list
-multimodal_models_list = [
-    "meta-llama/Llama-3.3-70B-Vision-Instruct",
-    "meta-llama/Llama-3.1-8B-Vision-Instruct",
-    "Qwen/Qwen2.5-VL-7B-Chat",
-    "Qwen/Qwen2.5-VL-3B-Chat",
-    "microsoft/Phi-3-vision-instruct",
-    "mistralai/Mixtral-8x7B-Instruct-v0.1",
-    "deepseek-ai/DeepSeek-VL-7B-Chat",
-    "01-ai/Yi-VL-6B-Chat",
-    "01-ai/Yi-VL-34B-Chat",
-    "Cohere/command-vision-nightly",
-    "LLaVA/llava-1.6-34b-hf",
-    "fireworks-ai/FireworksBridge-Vision-Alpha",
-    "liuhaotian/llava-v1.6-vicuna-13b",
 ]
-# Add multimodal models to the full model list
 models_list = [
     "meta-llama/Llama-3.3-70B-Instruct",
     "meta-llama/Llama-3.1-70B-Instruct",
@@ -268,337 +246,59 @@ models_list = [
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "tiiuae/falcon-7b-instruct",
     "01-ai/Yi-1.5-34B-Chat",
-] + multimodal_models_list  # Add multimodal models to the list
-# Create a custom ChatBot class that will display images
-def format_history_with_images(history):
     """
-    Format history for display in the chatbot, handling multimodal content
     """
-    formatted_history = []
-    for user_msg, assistant_msg in history:
-        # Process user message
-        if isinstance(user_msg, list):
-            # Multimodal message
-            formatted_user_msg = []
-            for item in user_msg:
-                if item.get("type") == "text":
-                    formatted_user_msg.append(item["text"])
-                elif item.get("type") == "image_url":
-                    # Extract the base64 image data
-                    img_url = item.get("image_url", {}).get("url", "")
-                    if img_url.startswith("data:image/"):
-                        formatted_user_msg.append((img_url, "image"))
-            formatted_history.append((formatted_user_msg, assistant_msg))
-        else:
-            # Regular text message
-            formatted_history.append((user_msg, assistant_msg))
-    return formatted_history
-# GRADIO UI
-# Create a custom chatbot that can display images
-chatbot = gr.Chatbot(
-    height=600,
-    show_copy_button=True,
-    placeholder="Select a model and begin chatting",
-    layout="panel"
 )
-print("Chatbot interface created.")
-# Create a virtual column layout for the message input area
-with gr.Blocks() as msg_input:
-    with gr.Row():
-        with gr.Column(scale=4):
-            msg = gr.Textbox(
-                placeholder="Enter text here or upload an image",
-                show_label=False,
-                container=False,
-                lines=3
-            )
-        with gr.Column(scale=1, min_width=50):
-            img_upload = gr.Image(
-                type="pil",
-                label="Upload Image",
-                show_label=False,
-                icon="🖼️",
-                container=True,
-                height=50,
-                width=50
-            )
-# Basic input components
-system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
-with gr.Accordion("Model Settings", open=False):
-    with gr.Row():
-        with gr.Column():
-            max_tokens_slider = gr.Slider(
-                minimum=1,
-                maximum=4096,
-                value=512,
-                step=1,
-                label="Max tokens"
-            )
-            temperature_slider = gr.Slider(
-                minimum=0.1,
-                maximum=4.0,
-                value=0.7,
-                step=0.1,
-                label="Temperature"
-            )
-        with gr.Column():
-            top_p_slider = gr.Slider(
-                minimum=0.1,
-                maximum=1.0,
-                value=0.95,
-                step=0.05,
-                label="Top-P"
-            )
-            frequency_penalty_slider = gr.Slider(
-                minimum=-2.0,
-                maximum=2.0,
-                value=0.0,
-                step=0.1,
-                label="Frequency Penalty"
-            )
-    with gr.Row():
-        seed_slider = gr.Slider(
-            minimum=-1,
-            maximum=65535,
-            value=-1,
-            step=1,
-            label="Seed (-1 for random)"
-        )
-with gr.Accordion("Model Selection", open=False):
-    with gr.Row():
-        with gr.Column():
-            # Provider selection
-            providers_list = [
-                "hf-inference",  # Default Hugging Face Inference
-                "cerebras",      # Cerebras provider
-                "together",      # Together AI
-                "sambanova",     # SambaNova
-                "novita",        # Novita AI
-                "cohere",        # Cohere
-                "fireworks-ai",  # Fireworks AI
-                "hyperbolic",    # Hyperbolic
-                "nebius",        # Nebius
-            ]
-            provider_radio = gr.Radio(
-                choices=providers_list,
-                value="hf-inference",
-                label="Inference Provider",
-                info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
-            )
-            # New BYOK textbox - Added for the new feature
-            byok_textbox = gr.Textbox(
-                value="",
-                label="BYOK (Bring Your Own Key)",
-                info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
-                placeholder="Enter your Hugging Face API token",
-                type="password"  # Hide the API key for security
-            )
-        with gr.Column():
-            # Model selection components
-            model_search_box = gr.Textbox(
-                label="Filter Models",
-                placeholder="Search for a featured model...",
-                lines=1
-            )
-            featured_model_radio = gr.Radio(
-                label="Select a model below",
-                choices=models_list,
-                value="meta-llama/Llama-3.3-70B-Vision-Instruct",  # Default to a multimodal model
-                interactive=True
-            )
-            # Custom model box
-            custom_model_box = gr.Textbox(
-                value="",
-                label="Custom Model",
-                info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
-                placeholder="meta-llama/Llama-3.3-70B-Vision-Instruct"
-            )
-            gr.Markdown("[See all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
-# Main Gradio interface
-with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
-    gr.Markdown("# 🤖 Serverless-MultiModal-Hub")
-    with gr.Row():
-        with gr.Column(scale=3):
-            # Display the chatbot
-            chatbot_interface = chatbot
-            # Custom submit function to handle multimodal inputs
-            def submit_message(message, images, history):
-                history = history or []
-                # Format the message content based on whether there are images
-                if images:
-                    # Create a multimodal message format for history display
-                    user_msg = []
-                    if message:
-                        user_msg.append({"type": "text", "text": message})
-                    # Add each image as an image_url item
-                    for img in images:
-                        if img is not None:
-                            img_base64 = encode_image_to_base64(img)
-                            img_url = f"data:image/jpeg;base64,{img_base64}"
-                            user_msg.append({
-                                "type": "image_url",
-                                "image_url": {"url": img_url}
-                            })
-                    # Add to history
-                    history.append([user_msg, None])
-                else:
-                    # Text-only message
-                    if message:
-                        history.append([message, None])
-                    else:
-                        # No content to submit
-                        return history
-                return history
-            # Create message input
-            with gr.Group():
-                with gr.Row():
-                    with gr.Column(scale=4):
-                        text_input = gr.Textbox(
-                            placeholder="Enter text here",
-                            show_label=False,
-                            container=False,
-                            lines=3
-                        )
-                    with gr.Column(scale=1, min_width=50):
-                        image_input = gr.Image(
-                            type="pil",
-                            label="Upload Image",
-                            show_label=False,
-                            sources=["upload", "clipboard"],
-                            tool="editor",
-                            height=100,
-                            visible=True
-                        )
-            # Submit button
-            submit_btn = gr.Button("Submit", variant="primary")
-            # Clear button
-            clear_btn = gr.Button("Clear")
-        with gr.Column(scale=1):
-            # Put settings here
-            system_message_box = gr.Textbox(
-                value="",
-                placeholder="You are a helpful assistant that can understand images.",
-                label="System Prompt",
-                lines=2
-            )
-            with gr.Accordion("Model Selection", open=False):
-                # Provider selection
-                provider_radio = gr.Radio(
-                    choices=providers_list,
-                    value="hf-inference",
-                    label="Inference Provider"
-                )
-                # BYOK textbox
-                byok_textbox = gr.Textbox(
-                    value="",
-                    label="API Key",
-                    placeholder="Enter provider API key",
-                    type="password"
-                )
-                # Model selection components
-                model_search_box = gr.Textbox(
-                    label="Filter Models",
-                    placeholder="Search models...",
-                    lines=1
-                )
-                featured_model_radio = gr.Radio(
-                    label="Models",
-                    choices=models_list,
-                    value="meta-llama/Llama-3.3-70B-Vision-Instruct",
-                    interactive=True
-                )
-                custom_model_box = gr.Textbox(
-                    value="",
-                    label="Custom Model",
-                    placeholder="Enter model path"
-                )
-                gr.Markdown("[View all multimodal models](https://huggingface.co/models?pipeline_tag=visual-question-answering&sort=trending)")
-            with gr.Accordion("Model Settings", open=False):
-                max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max tokens")
-                temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-                top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
-                frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
-                seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
-    # Connect the submit button
-    submit_btn.click(
-        fn=submit_message,
-        inputs=[text_input, image_input, chatbot_interface],
-        outputs=[chatbot_interface],
-        queue=False
-    ).then(
-        fn=respond,
-        inputs=[
-            text_input,
-            image_input,
-            chatbot_interface,
-            system_message_box,
-            max_tokens_slider,
-            temperature_slider,
-            top_p_slider,
-            frequency_penalty_slider,
-            seed_slider,
-            provider_radio,
-            byok_textbox,
-            custom_model_box,
-            model_search_box,
-            featured_model_radio
-        ],
-        outputs=[chatbot_interface],
-        queue=True
-    ).then(
-        fn=lambda: (None, None),  # Clear inputs after submission
-        inputs=None,
-        outputs=[text_input, image_input]
-    )
-    # Clear button functionality
-    clear_btn.click(lambda: None, None, chatbot_interface, queue=False)
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
         outputs=featured_model_radio
     )
     # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
@@ -606,6 +306,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
     # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
@@ -613,6 +314,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
     # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
@@ -620,7 +322,10 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
 if __name__ == "__main__":
-    print("Launching Serverless-MultiModal-Hub application.")
     demo.launch(show_api=True)

 from huggingface_hub import InferenceClient
 import os
 import json
 ACCESS_TOKEN = os.getenv("HF_TOKEN")
 print("Access token loaded.")
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     frequency_penalty,
     seed,
     provider,
+    custom_api_key,  # New parameter for BYOK
     custom_model,
     model_search_term,
     selected_model
 ):
     print(f"Received message: {message}")
     print(f"History: {history}")
     print(f"System message: {system_message}")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
     print(f"Selected provider: {provider}")
+    print(f"Custom API Key provided: {bool(custom_api_key.strip())}")  # Log whether a custom key was provided without printing the key
     print(f"Selected model (custom_model): {custom_model}")
     print(f"Model search term: {model_search_term}")
     print(f"Selected model from radio: {selected_model}")
     for val in history:
         user_part = val[0]
         assistant_part = val[1]
         if user_part:
+            messages.append({"role": "user", "content": user_part})
+            print(f"Added user message to context: {user_part}")
         if assistant_part:
             messages.append({"role": "assistant", "content": assistant_part})
             print(f"Added assistant message to context: {assistant_part}")
+    # Append the latest user message
+    messages.append({"role": "user", "content": message})
+    print("Latest user message appended.")
     # Determine which model to use, prioritizing custom_model if provided
     model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
     print(f"Model selected for inference: {model_to_use}")
     # Use the InferenceClient for making the request
     try:
         # Create a generator for the streaming response
+        # The provider is already set when initializing the client
         stream = client.chat_completion(
             model=model_to_use,
             messages=messages,
             stream=True,
+            **parameters  # Pass all other parameters
         )
         # Print a starting message for token streaming
         return gr.update(value="hf-inference")
     return gr.update(value=provider)
+# GRADIO UI
+chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
+print("Chatbot interface created.")
+# Basic input components
+system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
+max_tokens_slider = gr.Slider(
+    minimum=1,
+    maximum=4096,
+    value=512,
+    step=1,
+    label="Max tokens"
+)
+temperature_slider = gr.Slider(
+    minimum=0.1,
+    maximum=4.0,
+    value=0.7,
+    step=0.1,
+    label="Temperature"
+)
+top_p_slider = gr.Slider(
+    minimum=0.1,
+    maximum=1.0,
+    value=0.95,
+    step=0.05,
+    label="Top-P"
+)
+frequency_penalty_slider = gr.Slider(
+    minimum=-2.0,
+    maximum=2.0,
+    value=0.0,
+    step=0.1,
+    label="Frequency Penalty"
+)
+seed_slider = gr.Slider(
+    minimum=-1,
+    maximum=65535,
+    value=-1,
+    step=1,
+    label="Seed (-1 for random)"
+)
+# Provider selection
+providers_list = [
+    "hf-inference",  # Default Hugging Face Inference
+    "cerebras",      # Cerebras provider
+    "together",      # Together AI
+    "sambanova",     # SambaNova
+    "novita",        # Novita AI
+    "cohere",        # Cohere
+    "fireworks-ai",  # Fireworks AI
+    "hyperbolic",    # Hyperbolic
+    "nebius",        # Nebius
 ]
+provider_radio = gr.Radio(
+    choices=providers_list,
+    value="hf-inference",
+    label="Inference Provider",
+    info="[View all models here](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending)"
+)
+# New BYOK textbox - Added for the new feature
+byok_textbox = gr.Textbox(
+    value="",
+    label="BYOK (Bring Your Own Key)",
+    info="Enter a custom Hugging Face API key here. When empty, only 'hf-inference' provider can be used.",
+    placeholder="Enter your Hugging Face API token",
+    type="password"  # Hide the API key for security
+)
+# Custom model box
+custom_model_box = gr.Textbox(
+    value="",
+    label="Custom Model",
+    info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
+    placeholder="meta-llama/Llama-3.3-70B-Instruct"
+)
+# Model selection components
+model_search_box = gr.Textbox(
+    label="Filter Models",
+    placeholder="Search for a featured model...",
+    lines=1
+)
 models_list = [
     "meta-llama/Llama-3.3-70B-Instruct",
     "meta-llama/Llama-3.1-70B-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "tiiuae/falcon-7b-instruct",
     "01-ai/Yi-1.5-34B-Chat",
+]
+featured_model_radio = gr.Radio(
+    label="Select a model below",
+    choices=models_list,
+    value="meta-llama/Llama-3.3-70B-Instruct",
+    interactive=True
+)
+def filter_models(search_term):
+    print(f"Filtering models with search term: {search_term}")
+    filtered = [m for m in models_list if search_term.lower() in m.lower()]
+    print(f"Filtered models: {filtered}")
+    return gr.update(choices=filtered)
+def set_custom_model_from_radio(selected):
     """
+    This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
+    We will update the Custom Model text box with that selection automatically.
     """
+    print(f"Featured model selected: {selected}")
+    return selected
+# Create the Gradio interface
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        system_message_box,
+        max_tokens_slider,
+        temperature_slider,
+        top_p_slider,
+        frequency_penalty_slider,
+        seed_slider,
+        provider_radio,     # Provider selection
+        byok_textbox,       # New BYOK textbox
+        custom_model_box,   # Custom Model
+        model_search_box,   # Model search box
+        featured_model_radio # Featured model radio
+    ],
+    fill_height=True,
+    chatbot=chatbot,
+    theme="Nymbo/Nymbo_Theme",
 )
+print("ChatInterface object created.")
+with demo:
     # Connect the model filter to update the radio choices
     model_search_box.change(
         fn=filter_models,
         inputs=model_search_box,
         outputs=featured_model_radio
     )
+    print("Model search box change event linked.")
     # Connect the featured model radio to update the custom model box
     featured_model_radio.change(
         inputs=featured_model_radio,
         outputs=custom_model_box
     )
+    print("Featured model radio button change event linked.")
     # Connect the BYOK textbox to validate provider selection
     byok_textbox.change(
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
+    print("BYOK textbox change event linked.")
     # Also validate provider when the radio changes to ensure consistency
     provider_radio.change(
         inputs=[byok_textbox, provider_radio],
         outputs=provider_radio
     )
+    print("Provider radio button change event linked.")
+print("Gradio interface initialized.")
 if __name__ == "__main__":
+    print("Launching the demo application.")
     demo.launch(show_api=True)