import gradio as gr
from openai import OpenAI
import os

# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")

# Initialize the OpenAI API client
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)

def respond(
    message,
    history,
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed
):
    # Process the incoming message
    print(f"Received message: {message}")
    print(f"History: {history}")
    print(f"System Message: {system_message}")
    print(f"Max Tokens: {max_tokens}, Temperature: {temperature}, Top P: {top_p}")
    print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")

    # Convert seed to None if -1 (random)
    if seed == -1:
        seed = None

    # Construct the messages list for the API
    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for user_message, assistant_message in history:
        if user_message:
            messages.append({"role": "user", "content": user_message})
            print(f"Added user message: {user_message}")
        if assistant_message:
            messages.append({"role": "assistant", "content": assistant_message})
            print(f"Added assistant message: {assistant_message}")

    # Append the latest message
    messages.append({"role": "user", "content": message})

    # Initialize response
    response = ""

    # Make the API request
    for chunk in client.chat.completions.create(
        model="meta-llama/Llama-3.3-70B-Instruct",
        messages=messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        seed=seed,
        stream=True,
    ):
        # Extract the token text from the response chunk
        token = chunk.choices[0].message.content
        response += token
        yield response

# Create the Gradio Chatbot component
chatbot = gr.Chatbot(height=600)

# Define the Gradio ChatInterface
demo = gr.ChatInterface(
    chatbot=chatbot,
    fn=respond,
    inputs=[
        gr.Textbox(lines=1, placeholder="Enter your message..."),
        gr.Chatbot(label="Conversation History"),
        gr.Textbox(label="System Message"),
        gr.Slider(minimum=10, maximum=200, step=1, label="Max Tokens"),
        gr.Slider(minimum=0, maximum=2, step=0.1, label="Temperature"),
        gr.Slider(minimum=0, maximum=1, step=0.05, label="Top P"),
        gr.Slider(minimum=-2, maximum=2, step=0.1, label="Frequency Penalty"),
        gr.Slider(minimum=-1, maximum=1000000, step=1, label="Seed (-1 for random)"),
    ],
    theme="Nymbo/Nymbo_Theme",
)

# Create the "Featured Models" accordion
with gr.Accordion("Featured Models", open=True) as featured_models:
    # Textbox for searching models
    model_search = gr.Textbox(label="Filter Models")
    # List of featured models
    models = [
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-2-70B-Chat-hf",
        "TheBloke/Llama-2-13B-Chat-GGML",
        "TheBloke/Llama-2-70B-Chat-GGML",
        "TheBloke/Llama-2-13B-Chat-GGML-v2",
        "TheBloke/Llama-2-70B-Chat-GGML-v2",
        "TheBloke/Llama-2-70B-Chat-HF-API-compatible-GGML",
        "TheBloke/Llama-2-70b-chat-hf",
        "TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
        "TheBloke/Llama-7-13B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
        "TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
        # Add more models as needed...
    ]
    # Radio buttons for selecting a model
    model_radio = gr.Radio(choices=models, label="Select a Model")

    # Update the model list based on search input
    def filter_models(search_term):
        filtered_models = [model for model in models if search_term.lower() in model.lower()]
        return gr.update(choices=filtered_models)

    # Update the model list when the search box is used
    model_search.change(filter_models, inputs=model_search, outputs=model_radio)

# Create a "Custom Model" textbox
custom_model = gr.Textbox(label="Custom Model", placeholder="Hugging Face model path")

# Create the "Information" tab
with gr.Tab("Information"):
    # Featured Models accordion
    with gr.Accordion("Featured Models", open=False):
        gr.Markdown(
            """
            # Featured Models

            Here's a list of some popular models available on Hugging Face:

            - meta-llama/Llama-3.3-70B-Instruct
            - meta-llama/Llama-2-70B-Chat-hf
            - TheBloke/Llama-2-13B-Chat-GGML
            - TheBloke/Llama-2-70B-Chat-GGML
            - TheBloke/Llama-2-13B-Chat-GGML-v2
            - TheBloke/Llama-2-70B-Chat-GGML-v2
            - ... (and many more)

            You can search and select a model from the list above, or use your own custom model path.
            """
        )

    # Parameters Overview accordion
    with gr.Accordion("Parameters Overview", open=False):
        gr.Markdown(
            """
            # Parameters Overview

            Here's a brief explanation of the parameters you can adjust:

            - **Max Tokens**: The maximum number of tokens to generate in the response.
            - **Temperature**: Controls the randomness of the output. Higher values make the output more random.
            - **Top P**: Also known as nucleus sampling, it filters the least probable tokens, encouraging the model to be more creative.
            - **Frequency Penalty**: Penalizes repeated tokens to avoid repetition.
            - **Seed**: A fixed seed for reproducibility. Use -1 for a random seed.

            Feel free to experiment with these settings to achieve the desired output.
            """
        )

# Launch the Gradio interface
demo.launch(share=True)