Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,16 +2,18 @@ import gradio as gr
|
|
2 |
from openai import OpenAI
|
3 |
import os
|
4 |
|
|
|
5 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
6 |
print("Access token loaded.")
|
7 |
|
|
|
8 |
client = OpenAI(
|
9 |
base_url="https://api-inference.huggingface.co/v1/",
|
10 |
api_key=ACCESS_TOKEN,
|
11 |
)
|
12 |
print("OpenAI client initialized.")
|
13 |
|
14 |
-
|
15 |
def respond(
|
16 |
message,
|
17 |
history: list[tuple[str, str]],
|
@@ -21,7 +23,8 @@ def respond(
|
|
21 |
top_p,
|
22 |
frequency_penalty,
|
23 |
seed,
|
24 |
-
custom_model
|
|
|
25 |
):
|
26 |
|
27 |
print(f"Received message: {message}")
|
@@ -29,16 +32,18 @@ def respond(
|
|
29 |
print(f"System message: {system_message}")
|
30 |
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
|
31 |
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
|
32 |
-
print(f"
|
|
|
33 |
|
34 |
# Convert seed to None if -1 (meaning random)
|
35 |
if seed == -1:
|
36 |
seed = None
|
37 |
|
|
|
38 |
messages = [{"role": "system", "content": system_message}]
|
39 |
print("Initial messages array constructed.")
|
40 |
|
41 |
-
# Add conversation history to the
|
42 |
for val in history:
|
43 |
user_part = val[0]
|
44 |
assistant_part = val[1]
|
@@ -49,97 +54,119 @@ def respond(
|
|
49 |
messages.append({"role": "assistant", "content": assistant_part})
|
50 |
print(f"Added assistant message to context: {assistant_part}")
|
51 |
|
52 |
-
#
|
53 |
messages.append({"role": "user", "content": message})
|
54 |
print("Latest user message appended.")
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
#
|
61 |
response = ""
|
62 |
-
print("Sending request to
|
63 |
|
|
|
64 |
for message_chunk in client.chat.completions.create(
|
65 |
-
model=model_to_use,
|
66 |
-
max_tokens=max_tokens,
|
67 |
-
stream=True,
|
68 |
-
temperature=temperature,
|
69 |
-
top_p=top_p,
|
70 |
-
frequency_penalty=frequency_penalty,
|
71 |
-
seed=seed,
|
72 |
-
messages=messages,
|
73 |
):
|
|
|
74 |
token_text = message_chunk.choices[0].delta.content
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
|
79 |
print("Completed response generation.")
|
80 |
|
81 |
-
# GRADIO UI
|
82 |
|
|
|
83 |
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
|
84 |
print("Chatbot interface created.")
|
85 |
|
|
|
86 |
system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
)
|
95 |
-
temperature_slider = gr.Slider(
|
96 |
-
minimum=0.1,
|
97 |
-
maximum=4.0,
|
98 |
-
value=0.7,
|
99 |
-
step=0.1,
|
100 |
-
label="Temperature"
|
101 |
-
)
|
102 |
-
top_p_slider = gr.Slider(
|
103 |
-
minimum=0.1,
|
104 |
-
maximum=1.0,
|
105 |
-
value=0.95,
|
106 |
-
step=0.05,
|
107 |
-
label="Top-P"
|
108 |
-
)
|
109 |
-
frequency_penalty_slider = gr.Slider(
|
110 |
-
minimum=-2.0,
|
111 |
-
maximum=2.0,
|
112 |
-
value=0.0,
|
113 |
-
step=0.1,
|
114 |
-
label="Frequency Penalty"
|
115 |
-
)
|
116 |
-
seed_slider = gr.Slider(
|
117 |
-
minimum=-1,
|
118 |
-
maximum=65535,
|
119 |
-
value=-1,
|
120 |
-
step=1,
|
121 |
-
label="Seed (-1 for random)"
|
122 |
-
)
|
123 |
|
124 |
-
#
|
125 |
custom_model_box = gr.Textbox(
|
126 |
-
value="",
|
127 |
label="Custom Model",
|
128 |
-
info="(Optional) Provide a custom Hugging Face model path. Overrides
|
129 |
-
placeholder="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
)
|
|
|
131 |
|
132 |
-
def set_custom_model_from_radio(selected):
|
133 |
-
"""
|
134 |
-
This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
|
135 |
-
We will update the Custom Model text box with that selection automatically.
|
136 |
-
"""
|
137 |
-
print(f"Featured model selected: {selected}")
|
138 |
-
return selected
|
139 |
|
|
|
|
|
140 |
demo = gr.ChatInterface(
|
141 |
-
fn=respond,
|
142 |
-
additional_inputs=[
|
143 |
system_message_box,
|
144 |
max_tokens_slider,
|
145 |
temperature_slider,
|
@@ -147,84 +174,73 @@ demo = gr.ChatInterface(
|
|
147 |
frequency_penalty_slider,
|
148 |
seed_slider,
|
149 |
custom_model_box,
|
|
|
150 |
],
|
151 |
-
fill_height=True,
|
152 |
-
chatbot=chatbot,
|
153 |
-
theme="Nymbo/Nymbo_Theme",
|
154 |
)
|
155 |
print("ChatInterface object created.")
|
156 |
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
159 |
model_search_box = gr.Textbox(
|
160 |
label="Filter Models",
|
161 |
-
placeholder="Search
|
162 |
lines=1
|
163 |
)
|
164 |
print("Model search box created.")
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
"mistralai/Mistral-7B-Instruct-v0.3",
|
178 |
-
"mistralai/Mistral-7B-Instruct-v0.2",
|
179 |
-
"Qwen/Qwen3-235B-A22B",
|
180 |
-
"Qwen/Qwen3-32B",
|
181 |
-
"Qwen/Qwen2.5-72B-Instruct",
|
182 |
-
"Qwen/Qwen2.5-3B-Instruct",
|
183 |
-
"Qwen/Qwen2.5-0.5B-Instruct",
|
184 |
-
"Qwen/QwQ-32B",
|
185 |
-
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
186 |
-
"microsoft/Phi-3.5-mini-instruct",
|
187 |
-
"microsoft/Phi-3-mini-128k-instruct",
|
188 |
-
"microsoft/Phi-3-mini-4k-instruct",
|
189 |
-
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
190 |
-
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
191 |
-
"HuggingFaceH4/zephyr-7b-beta",
|
192 |
-
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
193 |
-
"tiiuae/falcon-7b-instruct",
|
194 |
-
"01-ai/Yi-1.5-34B-Chat",
|
195 |
-
]
|
196 |
-
print("Models list initialized.")
|
197 |
-
|
198 |
-
featured_model_radio = gr.Radio(
|
199 |
-
label="Select a model below",
|
200 |
-
choices=models_list,
|
201 |
-
value="meta-llama/Llama-3.3-70B-Instruct",
|
202 |
-
interactive=True
|
203 |
-
)
|
204 |
-
print("Featured models radio button created.")
|
205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
def filter_models(search_term):
|
207 |
print(f"Filtering models with search term: {search_term}")
|
|
|
208 |
filtered = [m for m in models_list if search_term.lower() in m.lower()]
|
209 |
print(f"Filtered models: {filtered}")
|
|
|
210 |
return gr.update(choices=filtered)
|
211 |
|
|
|
212 |
model_search_box.change(
|
213 |
-
fn=filter_models,
|
214 |
-
inputs=model_search_box,
|
215 |
-
outputs=featured_model_radio
|
216 |
)
|
217 |
print("Model search box change event linked.")
|
218 |
|
219 |
-
featured_model_radio.change(
|
220 |
-
fn=set_custom_model_from_radio,
|
221 |
-
inputs=featured_model_radio,
|
222 |
-
outputs=custom_model_box
|
223 |
-
)
|
224 |
-
print("Featured model radio button change event linked.")
|
225 |
|
226 |
-
print("Gradio interface
|
227 |
|
|
|
228 |
if __name__ == "__main__":
|
229 |
-
print("Launching the demo application.")
|
|
|
230 |
demo.launch(show_api=True)
|
|
|
2 |
from openai import OpenAI
|
3 |
import os
|
4 |
|
5 |
+
# Load the Hugging Face access token from environment variables
|
6 |
ACCESS_TOKEN = os.getenv("HF_TOKEN")
|
7 |
print("Access token loaded.")
|
8 |
|
9 |
+
# Initialize the OpenAI client pointing to the Hugging Face Inference API
|
10 |
client = OpenAI(
|
11 |
base_url="https://api-inference.huggingface.co/v1/",
|
12 |
api_key=ACCESS_TOKEN,
|
13 |
)
|
14 |
print("OpenAI client initialized.")
|
15 |
|
16 |
+
# Define the main function that handles chat responses
|
17 |
def respond(
|
18 |
message,
|
19 |
history: list[tuple[str, str]],
|
|
|
23 |
top_p,
|
24 |
frequency_penalty,
|
25 |
seed,
|
26 |
+
custom_model, # Input from the Custom Model textbox
|
27 |
+
featured_model # Input from the Featured Model radio buttons <<< NEW INPUT
|
28 |
):
|
29 |
|
30 |
print(f"Received message: {message}")
|
|
|
32 |
print(f"System message: {system_message}")
|
33 |
print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
|
34 |
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
|
35 |
+
print(f"Custom model input: '{custom_model}'")
|
36 |
+
print(f"Selected featured model: {featured_model}") # Log the featured model selection
|
37 |
|
38 |
# Convert seed to None if -1 (meaning random)
|
39 |
if seed == -1:
|
40 |
seed = None
|
41 |
|
42 |
+
# Start constructing the message list for the API call with the system message
|
43 |
messages = [{"role": "system", "content": system_message}]
|
44 |
print("Initial messages array constructed.")
|
45 |
|
46 |
+
# Add the conversation history to the messages list
|
47 |
for val in history:
|
48 |
user_part = val[0]
|
49 |
assistant_part = val[1]
|
|
|
54 |
messages.append({"role": "assistant", "content": assistant_part})
|
55 |
print(f"Added assistant message to context: {assistant_part}")
|
56 |
|
57 |
+
# Add the latest user message to the list
|
58 |
messages.append({"role": "user", "content": message})
|
59 |
print("Latest user message appended.")
|
60 |
|
61 |
+
# <<< MODEL SELECTION LOGIC UPDATED >>>
|
62 |
+
# Determine the model to use: Prioritize the custom model box if it's filled,
|
63 |
+
# otherwise use the selected featured model.
|
64 |
+
custom_model_stripped = custom_model.strip() # Remove leading/trailing whitespace
|
65 |
+
if custom_model_stripped != "":
|
66 |
+
model_to_use = custom_model_stripped # Use custom model if provided
|
67 |
+
print(f"Using custom model: {model_to_use}")
|
68 |
+
else:
|
69 |
+
model_to_use = featured_model # Use the selected featured model
|
70 |
+
print(f"Using selected featured model: {model_to_use}")
|
71 |
+
|
72 |
|
73 |
+
# Initialize an empty string to accumulate the response tokens
|
74 |
response = ""
|
75 |
+
print("Sending request to Hugging Face Inference API.")
|
76 |
|
77 |
+
# Stream the response from the API
|
78 |
for message_chunk in client.chat.completions.create(
|
79 |
+
model=model_to_use, # Use the determined model
|
80 |
+
max_tokens=max_tokens, # Set maximum tokens for the response
|
81 |
+
stream=True, # Enable streaming responses
|
82 |
+
temperature=temperature, # Set sampling temperature
|
83 |
+
top_p=top_p, # Set nucleus sampling probability
|
84 |
+
frequency_penalty=frequency_penalty, # Set frequency penalty
|
85 |
+
seed=seed, # Set random seed (if provided)
|
86 |
+
messages=messages, # Pass the constructed message history
|
87 |
):
|
88 |
+
# Get the text content from the current chunk
|
89 |
token_text = message_chunk.choices[0].delta.content
|
90 |
+
# Append the token text to the response string (if it's not None)
|
91 |
+
if token_text:
|
92 |
+
print(f"Received token: {token_text}")
|
93 |
+
response += token_text
|
94 |
+
yield response # Yield the partial response back to Gradio for live updates
|
95 |
|
96 |
print("Completed response generation.")
|
97 |
|
98 |
+
# --- GRADIO UI ---
|
99 |
|
100 |
+
# Create the main chatbot display area
|
101 |
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
|
102 |
print("Chatbot interface created.")
|
103 |
|
104 |
+
# Create the System Prompt input box
|
105 |
system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
|
106 |
|
107 |
+
# Create sliders for model parameters
|
108 |
+
max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
|
109 |
+
temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
110 |
+
top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
|
111 |
+
frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
|
112 |
+
seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
# Create the Custom Model input box
|
115 |
custom_model_box = gr.Textbox(
|
116 |
+
value="", # Default to empty
|
117 |
label="Custom Model",
|
118 |
+
info="(Optional) Provide a custom Hugging Face model path. Overrides the featured model selection below.",
|
119 |
+
placeholder="e.g., username/my-custom-model" # Updated placeholder
|
120 |
+
)
|
121 |
+
|
122 |
+
# Define the list of featured models
|
123 |
+
models_list = [
|
124 |
+
"meta-llama/Llama-3.3-70B-Instruct", # Default selected model
|
125 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
126 |
+
"meta-llama/Llama-3.0-70B-Instruct",
|
127 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
128 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
129 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
130 |
+
"NousResearch/Hermes-3-Llama-3.1-8B",
|
131 |
+
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
|
132 |
+
"mistralai/Mistral-Nemo-Instruct-2407",
|
133 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
134 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
135 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
136 |
+
"Qwen/Qwen3-235B-A22B",
|
137 |
+
"Qwen/Qwen3-32B",
|
138 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
139 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
140 |
+
"Qwen/Qwen2.5-0.5B-Instruct",
|
141 |
+
"Qwen/QwQ-32B",
|
142 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
143 |
+
"microsoft/Phi-3.5-mini-instruct",
|
144 |
+
"microsoft/Phi-3-mini-128k-instruct",
|
145 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
146 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
147 |
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
148 |
+
"HuggingFaceH4/zephyr-7b-beta",
|
149 |
+
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
150 |
+
"tiiuae/falcon-7b-instruct",
|
151 |
+
"01-ai/Yi-1.5-34B-Chat",
|
152 |
+
]
|
153 |
+
print("Models list initialized.")
|
154 |
+
|
155 |
+
# Create the radio button selector for featured models
|
156 |
+
featured_model_radio = gr.Radio(
|
157 |
+
label="Select a Featured Model", # Changed label slightly
|
158 |
+
choices=models_list,
|
159 |
+
value="meta-llama/Llama-3.3-70B-Instruct", # Set the default selection
|
160 |
+
interactive=True
|
161 |
)
|
162 |
+
print("Featured models radio button created.")
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
# --- Create the main Chat Interface ---
|
166 |
+
# <<< `additional_inputs` UPDATED >>>
|
167 |
demo = gr.ChatInterface(
|
168 |
+
fn=respond, # The function to call when a message is sent
|
169 |
+
additional_inputs=[ # List of input components passed to the 'respond' function
|
170 |
system_message_box,
|
171 |
max_tokens_slider,
|
172 |
temperature_slider,
|
|
|
174 |
frequency_penalty_slider,
|
175 |
seed_slider,
|
176 |
custom_model_box,
|
177 |
+
featured_model_radio # Pass the radio button selection <<< ADDED
|
178 |
],
|
179 |
+
fill_height=True, # Make the interface fill the available height
|
180 |
+
chatbot=chatbot, # Use the predefined chatbot component
|
181 |
+
theme="Nymbo/Nymbo_Theme", # Apply a theme
|
182 |
)
|
183 |
print("ChatInterface object created.")
|
184 |
|
185 |
+
# --- Add Model Selection Controls within the Interface ---
|
186 |
+
with demo: # Use the ChatInterface as a context manager to add elements
|
187 |
+
with gr.Accordion("Model Selection & Parameters", open=False): # Group controls in an accordion
|
188 |
+
# --- Featured Model Selection ---
|
189 |
+
gr.Markdown("### Featured Models") # Section title
|
190 |
model_search_box = gr.Textbox(
|
191 |
label="Filter Models",
|
192 |
+
placeholder="Search featured models...",
|
193 |
lines=1
|
194 |
)
|
195 |
print("Model search box created.")
|
196 |
|
197 |
+
# Place the radio buttons here
|
198 |
+
# No need to define `featured_model_radio` again, just use the variable defined above
|
199 |
+
demo.load(lambda: featured_model_radio, outputs=featured_model_radio) # Ensure it appears in the layout
|
200 |
+
print("Featured model radio added to layout.")
|
201 |
+
|
202 |
+
|
203 |
+
# --- Custom Model Input ---
|
204 |
+
gr.Markdown("### Custom Model") # Section title
|
205 |
+
# No need to define `custom_model_box` again, just use the variable defined above
|
206 |
+
demo.load(lambda: custom_model_box, outputs=custom_model_box) # Ensure it appears in the layout
|
207 |
+
print("Custom model box added to layout.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
+
# --- Parameters ---
|
210 |
+
gr.Markdown("### Parameters") # Section title
|
211 |
+
# Add sliders to the layout
|
212 |
+
demo.load(lambda: max_tokens_slider, outputs=max_tokens_slider)
|
213 |
+
demo.load(lambda: temperature_slider, outputs=temperature_slider)
|
214 |
+
demo.load(lambda: top_p_slider, outputs=top_p_slider)
|
215 |
+
demo.load(lambda: frequency_penalty_slider, outputs=frequency_penalty_slider)
|
216 |
+
demo.load(lambda: seed_slider, outputs=seed_slider)
|
217 |
+
print("Parameter sliders added to layout.")
|
218 |
+
|
219 |
+
|
220 |
+
# --- Event Listeners ---
|
221 |
+
|
222 |
+
# Function to filter the radio button choices based on search input
|
223 |
def filter_models(search_term):
|
224 |
print(f"Filtering models with search term: {search_term}")
|
225 |
+
# List comprehension to find models matching the search term (case-insensitive)
|
226 |
filtered = [m for m in models_list if search_term.lower() in m.lower()]
|
227 |
print(f"Filtered models: {filtered}")
|
228 |
+
# Update the 'choices' property of the radio button component
|
229 |
return gr.update(choices=filtered)
|
230 |
|
231 |
+
# Link the search box's 'change' event to the filter function
|
232 |
model_search_box.change(
|
233 |
+
fn=filter_models, # Function to call
|
234 |
+
inputs=model_search_box, # Input component triggering the event
|
235 |
+
outputs=featured_model_radio # Output component to update
|
236 |
)
|
237 |
print("Model search box change event linked.")
|
238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
+
print("Gradio interface layout defined.")
|
241 |
|
242 |
+
# --- Launch the Application ---
|
243 |
if __name__ == "__main__":
|
244 |
+
print("Launching the Gradio demo application.")
|
245 |
+
# Launch the Gradio app with API endpoint enabled
|
246 |
demo.launch(show_api=True)
|