# import os
# import gradio as gr
# from huggingface_hub import InferenceClient
# from dotenv import load_dotenv

# # Load environment variables
# load_dotenv()
# HF_TOKEN = os.getenv("HF_TOKEN")

# # Initialize Hugging Face Inference Client
# client = InferenceClient(
#     model="mistralai/Mistral-7B-Instruct-v0.3",
#     token=HF_TOKEN
# )

# # System prompt for coding assistant
# system_message = (
#     "You are a helpful and experienced coding assistant specialized in web development. "
#     "Help the user by generating complete and functional code for building websites. "
#     "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
#     "based on their requirements."
# )

# # Streaming chatbot logic
# def respond(message, history):
#     # Prepare messages with system prompt
#     messages = [{"role": "system", "content": system_message}]
#     for msg in history:
#         messages.append(msg)
#     messages.append({"role": "user", "content": message})

#     # Stream response from the model
#     response = ""
#     for chunk in client.chat.completions.create(
#         model="mistralai/Mistral-7B-Instruct-v0.3",
#         messages=messages,
#         max_tokens=1024,
#         temperature=0.7,
#         top_p=0.95,
#         stream=True,
#     ):
#         token = chunk.choices[0].delta.get("content", "") or ""
#         response += token
#         yield response

# # Create Gradio interface
# with gr.Blocks() as demo:
#     chatbot = gr.Chatbot(type='messages')  # Use modern message format
#     gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")  # Match format

# # Launch app
# if __name__ == "__main__":
#     demo.launch()

import os
import gradio as gr
from huggingface_hub import InferenceClient
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")

# Initialize Hugging Face Inference Client
client = InferenceClient(
    model="Qwen/Qwen2.5-Coder-7B-Instruct",
    token=HF_TOKEN
)

# System prompt for coding assistant
system_message = (
    "You are a helpful and experienced coding assistant specialized in web development. "
    "Help the user by generating complete and functional code for building websites. "
    "You can provide HTML, CSS, JavaScript, and backend code (like Flask, Node.js, etc.) "
    "based on their requirements."
)

# Streaming chatbot logic
def respond(message, history):
    # Prepare messages with system prompt
    messages = [{"role": "system", "content": system_message}]
    for user_msg, assistant_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": assistant_msg})
    messages.append({"role": "user", "content": message})

    # Stream response from the model
    response = ""
    for chunk in client.chat.completions.create(
        model="Qwen/Qwen2.5-Coder-7B-Instruct",
        messages=messages,
        max_tokens=2048,
        temperature=0.7,
        top_p=0.95,
        stream=True,
    ):
        # Safely handle empty choices
        if not chunk.choices:
            continue
            
        # Safely extract token content
        token = chunk.choices[0].delta.content or ""
        response += token
        yield response

# Create Gradio interface
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(type='messages')  # Use modern message format
    gr.ChatInterface(fn=respond, chatbot=chatbot, type="messages")  # Match format

# Launch app
if __name__ == "__main__":
    demo.launch()