m96tkmok commited on
Commit
c4ab49c
·
verified ·
1 Parent(s): aad4507

Update app.py

Browse files

Fallback to Gradio

Files changed (1) hide show
  1. app.py +60 -114
app.py CHANGED
@@ -1,125 +1,71 @@
1
- import streamlit as st
2
  import os
3
 
4
- from typing import Iterator
5
  from huggingface_hub import InferenceClient
6
 
 
 
 
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
 
9
- # Configure page settings
10
- st.set_page_config(
11
- page_title="LLM Taiwan Chat",
12
- page_icon="💬",
13
- layout="centered"
14
- )
15
 
16
- # Initialize session state for chat history and system prompt
17
- if "messages" not in st.session_state:
18
- st.session_state.messages = []
19
- if "system_prompt" not in st.session_state:
20
- st.session_state.system_prompt = "你是一個產自台灣的聊天機械人, 你以台灣本地人的身份, 使用正體中文回答問題."
21
- if "temperature" not in st.session_state:
22
- st.session_state.temperature = 0.2
23
- if "top_p" not in st.session_state:
24
- st.session_state.top_p = 0.95
25
-
26
- ## model="lianghsun/Llama-3.2-Taiwan-3B" to meta-llama/Llama-3.2-3B-Instruct
27
-
28
- def stream_chat(prompt: str) -> Iterator[str]:
29
- """Stream chat responses from the LLM API"""
30
-
31
- client = InferenceClient(model="meta-llama/Llama-3.2-3B-Instruct", timeout=30, token=HF_TOKEN)
32
-
33
-
34
- messages = []
35
- if st.session_state.system_prompt:
36
- messages.append({"role": "system", "content": st.session_state.system_prompt})
37
- messages.extend(st.session_state.messages)
38
-
39
- stream = client.chat.completions.create(
40
- messages=messages,
41
- model="meta-llama/Llama-3.2-3B-Instruct",
 
42
  stream=True,
43
- temperature=st.session_state.temperature,
44
- top_p=st.session_state.top_p
45
- )
46
-
47
- for chunk in stream:
48
- if chunk.choices[0].delta.content is not None:
49
- yield chunk.choices[0].delta.content
50
-
51
- def clear_chat_history():
52
- """Clear all chat messages and reset system prompt"""
53
- st.session_state.messages = []
54
- st.session_state.system_prompt = ""
55
-
56
- def main():
57
- st.title("💬 LLM Taiwan Chat")
58
-
59
- # Add a clear chat button with custom styling
60
- col1, col2 = st.columns([6, 1])
61
- with col2:
62
- if st.button("🗑️", type="secondary", use_container_width=True):
63
- clear_chat_history()
64
- st.rerun()
65
-
66
- # Advanced options in expander
67
- with st.expander("進階選項 ⚙️", expanded=False):
68
- # System prompt input
69
- system_prompt = st.text_area(
70
- "System Prompt 設定:",
71
- value=st.session_state.system_prompt,
72
- help="設定 system prompt 來定義 AI 助理的行為和角色。開始對話後將無法修改。",
73
- height=100,
74
- disabled=len(st.session_state.messages) > 0 # 當有對話時設為唯讀
75
- )
76
- if not st.session_state.messages and system_prompt != st.session_state.system_prompt:
77
- st.session_state.system_prompt = system_prompt
78
-
79
- st.session_state.temperature = st.slider(
80
- "Temperature",
81
- min_value=0.0,
82
- max_value=2.0,
83
- value=st.session_state.temperature,
84
- step=0.1,
85
- help="較高的值會使輸出更加隨機,較低的值會使其更加集中和確定。"
86
- )
87
- st.session_state.top_p = st.slider(
88
- "Top P",
89
- min_value=0.1,
90
- max_value=1.0,
91
- value=st.session_state.top_p,
92
  step=0.05,
93
- help="控制模型輸出的多樣性,較低的值會使輸出更加保守。"
94
- )
95
-
96
- # Display chat messages
97
- for message in st.session_state.messages:
98
- with st.chat_message(message["role"]):
99
- st.write(message["content"])
100
-
101
- # Chat input
102
- if prompt := st.chat_input("輸入您的訊息..."):
103
- # Add user message to chat history
104
- st.session_state.messages.append({"role": "user", "content": prompt})
105
-
106
- # Display user message
107
- with st.chat_message("user"):
108
- st.write(prompt)
109
-
110
- # Display assistant response with streaming
111
- with st.chat_message("assistant"):
112
- response_placeholder = st.empty()
113
- full_response = ""
114
-
115
- # Stream the response
116
- for response_chunk in stream_chat(prompt):
117
- full_response += response_chunk
118
- response_placeholder.markdown(full_response + "▌")
119
- response_placeholder.markdown(full_response)
120
-
121
- # Add assistant response to chat history
122
- st.session_state.messages.append({"role": "assistant", "content": full_response})
123
 
124
  if __name__ == "__main__":
125
- main()
 
1
+ import gradio as gr
2
  import os
3
 
 
4
  from huggingface_hub import InferenceClient
5
 
6
+ """
7
+ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
+ """
9
+
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
 
 
 
 
 
 
 
12
 
13
+ ### Model used changes from HuggingFaceH4/zephyr-7b-beta to meta-llama/Llama-3.2-3B-Instruct
14
+ client = InferenceClient("lianghsun/Llama-3.2-Taiwan-3B", timeout=30, token=HF_TOKEN)
15
+
16
+
17
+ def respond(
18
+ message,
19
+ history: list[tuple[str, str]],
20
+ system_message,
21
+ max_tokens,
22
+ temperature,
23
+ top_p,
24
+ ):
25
+ messages = [{"role": "system", "content": system_message}]
26
+
27
+ for val in history:
28
+ if val[0]:
29
+ messages.append({"role": "user", "content": val[0]})
30
+ if val[1]:
31
+ messages.append({"role": "assistant", "content": val[1]})
32
+
33
+ messages.append({"role": "user", "content": message})
34
+
35
+ response = ""
36
+
37
+ for message in client.chat_completion(
38
+ messages,
39
+ max_tokens=max_tokens,
40
  stream=True,
41
+ temperature=temperature,
42
+ top_p=top_p,
43
+ ):
44
+ token = message.choices[0].delta.content
45
+
46
+ response += token
47
+ yield response
48
+
49
+
50
+ """
51
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
52
+ """
53
+ demo = gr.ChatInterface(
54
+ respond,
55
+ additional_inputs=[
56
+ gr.Textbox(value="你是一個產自台灣的聊天機械人, 你以台灣本地人的身份, 使用正體中文回答問題.", label="System message"),
57
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
58
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
59
+ gr.Slider(
60
+ minimum=0.1,
61
+ maximum=1.0,
62
+ value=0.95,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  step=0.05,
64
+ label="Top-p (nucleus sampling)",
65
+ ),
66
+ ],
67
+ )
68
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  if __name__ == "__main__":
71
+ demo.launch()