Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

File size: 10,235 Bytes

038f313
fab24df
c5a20a4
038f313
4df41b9
db00df1
0ef95ea
c6bdd15
4df41b9
038f313
 
 
 
0ef95ea
038f313
4df41b9
038f313
27c8b8d
 
 
038f313
 
 
3a64d68
98674ca
4df41b9
 
038f313
0ef95ea
 
 
 
 
 
4df41b9
 
f7c4208
901bafe
0ef95ea
 
038f313
4df41b9
c5a20a4
0ef95ea
901bafe
4df41b9
27c8b8d
a05c183
 
27c8b8d
30153c5
0ef95ea
27c8b8d
30153c5
0ef95ea
27c8b8d
4df41b9
27c8b8d
0ef95ea
27c8b8d
4df41b9
 
 
 
 
 
 
 
 
 
 
0ef95ea
4df41b9
0ef95ea
4df41b9
0ef95ea
4df41b9
0ef95ea
4df41b9
 
 
 
 
 
 
 
0ef95ea
4df41b9
0ef95ea
4df41b9
 
 
 
 
0ef95ea
 
901bafe
4df41b9
f7c4208
4df41b9
f93f32f
0ef95ea
a8fc89d
4df41b9
0ef95ea
901bafe
4df41b9
 
 
 
 
 
901bafe
4df41b9
901bafe
4df41b9
901bafe
4df41b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901bafe
4df41b9
a8fc89d
b0cbd1c
4df41b9
 
a8fc89d
4df41b9
 
30153c5
 
 
 
 
 
817474e
4df41b9
a8fc89d
4df41b9
 
 
901bafe
0ef95ea
901bafe
4df41b9
 
 
 
 
817474e
 
4df41b9
817474e
 
0ef95ea
b0cbd1c
4df41b9
 
 
 
 
 
 
 
 
 
 
a8fc89d
4df41b9
 
 
 
 
 
 
 
 
 
 
 
 
 
b0cbd1c
0ef95ea
4df41b9
b0cbd1c
0ef95ea
4df41b9
b0cbd1c
 
4df41b9
a8fc89d
4df41b9
 
 
a8fc89d
0ef95ea
a8fc89d
 
4df41b9
769901b
4df41b9
77298b9
4df41b9
 
381d9f9

import gradio as gr
from openai import OpenAI
import os

# Load the Hugging Face access token from environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

# Initialize the OpenAI client pointing to the Hugging Face Inference API
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")

# Define the main function that handles chat responses
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model,  # Input from the Custom Model textbox
    featured_model # Input from the Featured Model radio buttons <<< NEW INPUT
):

    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System message: {system_message}")
    print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
    print(f"Custom model input: '{custom_model}'")
    print(f"Selected featured model: {featured_model}") # Log the featured model selection

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    # Start constructing the message list for the API call with the system message
    messages = [{"role": "system", "content": system_message}]
    print("Initial messages array constructed.")

    # Add the conversation history to the messages list
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
            print(f"Added user message to context: {user_part}")
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})
            print(f"Added assistant message to context: {assistant_part}")

    # Add the latest user message to the list
    messages.append({"role": "user", "content": message})
    print("Latest user message appended.")

    # <<< MODEL SELECTION LOGIC UPDATED >>>
    # Determine the model to use: Prioritize the custom model box if it's filled,
    # otherwise use the selected featured model.
    custom_model_stripped = custom_model.strip() # Remove leading/trailing whitespace
    if custom_model_stripped != "":
        model_to_use = custom_model_stripped # Use custom model if provided
        print(f"Using custom model: {model_to_use}")
    else:
        model_to_use = featured_model # Use the selected featured model
        print(f"Using selected featured model: {model_to_use}")


    # Initialize an empty string to accumulate the response tokens
    response = ""
    print("Sending request to Hugging Face Inference API.")

    # Stream the response from the API
    for message_chunk in client.chat.completions.create(
        model=model_to_use,          # Use the determined model
        max_tokens=max_tokens,       # Set maximum tokens for the response
        stream=True,                 # Enable streaming responses
        temperature=temperature,     # Set sampling temperature
        top_p=top_p,                 # Set nucleus sampling probability
        frequency_penalty=frequency_penalty, # Set frequency penalty
        seed=seed,                   # Set random seed (if provided)
        messages=messages,           # Pass the constructed message history
    ):
        # Get the text content from the current chunk
        token_text = message_chunk.choices[0].delta.content
        # Append the token text to the response string (if it's not None)
        if token_text:
            print(f"Received token: {token_text}")
            response += token_text
            yield response # Yield the partial response back to Gradio for live updates

    print("Completed response generation.")

# --- GRADIO UI ---

# Create the main chatbot display area
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
print("Chatbot interface created.")

# Create the System Prompt input box
system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")

# Create sliders for model parameters
max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")

# Create the Custom Model input box
custom_model_box = gr.Textbox(
    value="", # Default to empty
    label="Custom Model",
    info="(Optional) Provide a custom Hugging Face model path. Overrides the featured model selection below.",
    placeholder="e.g., username/my-custom-model" # Updated placeholder
)

# Define the list of featured models
models_list = [
    "meta-llama/Llama-3.3-70B-Instruct", # Default selected model
    "meta-llama/Llama-3.1-70B-Instruct",
    "meta-llama/Llama-3.0-70B-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.2-1B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
    "NousResearch/Hermes-3-Llama-3.1-8B",
    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
    "mistralai/Mistral-Nemo-Instruct-2407",
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "Qwen/Qwen3-235B-A22B",
    "Qwen/Qwen3-32B",
    "Qwen/Qwen2.5-72B-Instruct",
    "Qwen/Qwen2.5-3B-Instruct",
    "Qwen/Qwen2.5-0.5B-Instruct",
    "Qwen/QwQ-32B",
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "microsoft/Phi-3.5-mini-instruct",
    "microsoft/Phi-3-mini-128k-instruct",
    "microsoft/Phi-3-mini-4k-instruct",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
    "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
    "HuggingFaceH4/zephyr-7b-beta",
    "HuggingFaceTB/SmolLM2-360M-Instruct",
    "tiiuae/falcon-7b-instruct",
    "01-ai/Yi-1.5-34B-Chat",
]
print("Models list initialized.")

# Create the radio button selector for featured models
featured_model_radio = gr.Radio(
    label="Select a Featured Model", # Changed label slightly
    choices=models_list,
    value="meta-llama/Llama-3.3-70B-Instruct", # Set the default selection
    interactive=True
)
print("Featured models radio button created.")


# --- Create the main Chat Interface ---
# <<< `additional_inputs` UPDATED >>>
demo = gr.ChatInterface(
    fn=respond, # The function to call when a message is sent
    additional_inputs=[ # List of input components passed to the 'respond' function
        system_message_box,
        max_tokens_slider,
        temperature_slider,
        top_p_slider,
        frequency_penalty_slider,
        seed_slider,
        custom_model_box,
        featured_model_radio # Pass the radio button selection <<< ADDED
    ],
    fill_height=True, # Make the interface fill the available height
    chatbot=chatbot, # Use the predefined chatbot component
    theme="Nymbo/Nymbo_Theme", # Apply a theme
)
print("ChatInterface object created.")

# --- Add Model Selection Controls within the Interface ---
with demo: # Use the ChatInterface as a context manager to add elements
    with gr.Accordion("Model Selection & Parameters", open=False): # Group controls in an accordion
        # --- Featured Model Selection ---
        gr.Markdown("### Featured Models") # Section title
        model_search_box = gr.Textbox(
            label="Filter Models",
            placeholder="Search featured models...",
            lines=1
        )
        print("Model search box created.")

        # Place the radio buttons here
        # No need to define `featured_model_radio` again, just use the variable defined above
        demo.load(lambda: featured_model_radio, outputs=featured_model_radio) # Ensure it appears in the layout
        print("Featured model radio added to layout.")


        # --- Custom Model Input ---
        gr.Markdown("### Custom Model") # Section title
        # No need to define `custom_model_box` again, just use the variable defined above
        demo.load(lambda: custom_model_box, outputs=custom_model_box) # Ensure it appears in the layout
        print("Custom model box added to layout.")

        # --- Parameters ---
        gr.Markdown("### Parameters") # Section title
        # Add sliders to the layout
        demo.load(lambda: max_tokens_slider, outputs=max_tokens_slider)
        demo.load(lambda: temperature_slider, outputs=temperature_slider)
        demo.load(lambda: top_p_slider, outputs=top_p_slider)
        demo.load(lambda: frequency_penalty_slider, outputs=frequency_penalty_slider)
        demo.load(lambda: seed_slider, outputs=seed_slider)
        print("Parameter sliders added to layout.")


        # --- Event Listeners ---

        # Function to filter the radio button choices based on search input
        def filter_models(search_term):
            print(f"Filtering models with search term: {search_term}")
            # List comprehension to find models matching the search term (case-insensitive)
            filtered = [m for m in models_list if search_term.lower() in m.lower()]
            print(f"Filtered models: {filtered}")
            # Update the 'choices' property of the radio button component
            return gr.update(choices=filtered)

        # Link the search box's 'change' event to the filter function
        model_search_box.change(
            fn=filter_models,          # Function to call
            inputs=model_search_box,   # Input component triggering the event
            outputs=featured_model_radio # Output component to update
        )
        print("Model search box change event linked.")


print("Gradio interface layout defined.")

# --- Launch the Application ---
if __name__ == "__main__":
    print("Launching the Gradio demo application.")
    # Launch the Gradio app with API endpoint enabled
    demo.launch(show_api=True)