import gradio as gr from huggingface_hub import InferenceClient from optimum.intel import OVModelForCausalLM from transformers import AutoTokenizer, pipeline # 載入模型和標記器 model_id = "HelloSun/Qwen2.5-0.5B-Instruct-openvino" model = OVModelForCausalLM.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) # 建立生成管道 pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def respond(message, history): # 將當前訊息與歷史訊息合併 input_text = message if not history else history[-1][1] + " " + message # 獲取模型的回應 response = pipe(input_text, max_length=100, num_return_sequences=1) return response[0]['generated_text'], history + [(message, response[0]['generated_text'])] # 設定 Gradio 的聊天界面 demo = gr.ChatInterface(fn=respond) if __name__ == "__main__": demo.launch()