import gradio as gr from huggingface_hub import InferenceClient # Initialize the model and tokenizer client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # Define the conversation flow def respond(message, history, system_message, max_tokens, temperature, top_p): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response # Create the chat interface css = """ body { background-color: #f9f9f9; } .gradio-container { max-width: 800px; margin: 40px auto; padding: 20px; border: 1px solid #ddd; border-radius: 10px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); } .gradio-input { background-color: #fff; border: 1px solid #ccc; padding: 10px; border-radius: 10px; } .gradio-button { background-color: #3498db; color: #fff; border: none; padding: 10px 20px; border-radius: 10px; cursor: pointer; } .gradio-button:hover { background-color: #2980b9; } """ demo = gr.Interface( fn=respond, inputs=["text", "state", "text", "number", "number", "number"], outputs="text", title="NVS AI: Health Conversational Chatbot", description="Get answers to your health-related questions!", ) demo.launch()