Nymbo commited on
Commit
4df41b9
·
verified ·
1 Parent(s): 381d9f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -126
app.py CHANGED
@@ -2,16 +2,18 @@ import gradio as gr
2
  from openai import OpenAI
3
  import os
4
 
 
5
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
6
  print("Access token loaded.")
7
 
 
8
  client = OpenAI(
9
  base_url="https://api-inference.huggingface.co/v1/",
10
  api_key=ACCESS_TOKEN,
11
  )
12
  print("OpenAI client initialized.")
13
 
14
-
15
  def respond(
16
  message,
17
  history: list[tuple[str, str]],
@@ -21,7 +23,8 @@ def respond(
21
  top_p,
22
  frequency_penalty,
23
  seed,
24
- custom_model
 
25
  ):
26
 
27
  print(f"Received message: {message}")
@@ -29,16 +32,18 @@ def respond(
29
  print(f"System message: {system_message}")
30
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
31
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
32
- print(f"Selected model (custom_model): {custom_model}")
 
33
 
34
  # Convert seed to None if -1 (meaning random)
35
  if seed == -1:
36
  seed = None
37
 
 
38
  messages = [{"role": "system", "content": system_message}]
39
  print("Initial messages array constructed.")
40
 
41
- # Add conversation history to the context
42
  for val in history:
43
  user_part = val[0]
44
  assistant_part = val[1]
@@ -49,97 +54,119 @@ def respond(
49
  messages.append({"role": "assistant", "content": assistant_part})
50
  print(f"Added assistant message to context: {assistant_part}")
51
 
52
- # Append the latest user message
53
  messages.append({"role": "user", "content": message})
54
  print("Latest user message appended.")
55
 
56
- # If user provided a model, use that; otherwise, fall back to a default model
57
- model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
58
- print(f"Model selected for inference: {model_to_use}")
 
 
 
 
 
 
 
 
59
 
60
- # Start with an empty string to build the response as tokens stream in
61
  response = ""
62
- print("Sending request to OpenAI API.")
63
 
 
64
  for message_chunk in client.chat.completions.create(
65
- model=model_to_use,
66
- max_tokens=max_tokens,
67
- stream=True,
68
- temperature=temperature,
69
- top_p=top_p,
70
- frequency_penalty=frequency_penalty,
71
- seed=seed,
72
- messages=messages,
73
  ):
 
74
  token_text = message_chunk.choices[0].delta.content
75
- print(f"Received token: {token_text}")
76
- response += token_text
77
- yield response
 
 
78
 
79
  print("Completed response generation.")
80
 
81
- # GRADIO UI
82
 
 
83
  chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
84
  print("Chatbot interface created.")
85
 
 
86
  system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
87
 
88
- max_tokens_slider = gr.Slider(
89
- minimum=1,
90
- maximum=4096,
91
- value=512,
92
- step=1,
93
- label="Max new tokens"
94
- )
95
- temperature_slider = gr.Slider(
96
- minimum=0.1,
97
- maximum=4.0,
98
- value=0.7,
99
- step=0.1,
100
- label="Temperature"
101
- )
102
- top_p_slider = gr.Slider(
103
- minimum=0.1,
104
- maximum=1.0,
105
- value=0.95,
106
- step=0.05,
107
- label="Top-P"
108
- )
109
- frequency_penalty_slider = gr.Slider(
110
- minimum=-2.0,
111
- maximum=2.0,
112
- value=0.0,
113
- step=0.1,
114
- label="Frequency Penalty"
115
- )
116
- seed_slider = gr.Slider(
117
- minimum=-1,
118
- maximum=65535,
119
- value=-1,
120
- step=1,
121
- label="Seed (-1 for random)"
122
- )
123
 
124
- # The custom_model_box is what the respond function sees as "custom_model"
125
  custom_model_box = gr.Textbox(
126
- value="",
127
  label="Custom Model",
128
- info="(Optional) Provide a custom Hugging Face model path. Overrides any selected featured model.",
129
- placeholder="meta-llama/Llama-3.3-70B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
 
131
 
132
- def set_custom_model_from_radio(selected):
133
- """
134
- This function will get triggered whenever someone picks a model from the 'Featured Models' radio.
135
- We will update the Custom Model text box with that selection automatically.
136
- """
137
- print(f"Featured model selected: {selected}")
138
- return selected
139
 
 
 
140
  demo = gr.ChatInterface(
141
- fn=respond,
142
- additional_inputs=[
143
  system_message_box,
144
  max_tokens_slider,
145
  temperature_slider,
@@ -147,84 +174,73 @@ demo = gr.ChatInterface(
147
  frequency_penalty_slider,
148
  seed_slider,
149
  custom_model_box,
 
150
  ],
151
- fill_height=True,
152
- chatbot=chatbot,
153
- theme="Nymbo/Nymbo_Theme",
154
  )
155
  print("ChatInterface object created.")
156
 
157
- with demo:
158
- with gr.Accordion("Model Selection", open=False):
 
 
 
159
  model_search_box = gr.Textbox(
160
  label="Filter Models",
161
- placeholder="Search for a featured model...",
162
  lines=1
163
  )
164
  print("Model search box created.")
165
 
166
- models_list = [
167
- "meta-llama/Llama-3.3-70B-Instruct",
168
- "meta-llama/Llama-3.1-70B-Instruct",
169
- "meta-llama/Llama-3.0-70B-Instruct",
170
- "meta-llama/Llama-3.2-3B-Instruct",
171
- "meta-llama/Llama-3.2-1B-Instruct",
172
- "meta-llama/Llama-3.1-8B-Instruct",
173
- "NousResearch/Hermes-3-Llama-3.1-8B",
174
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
175
- "mistralai/Mistral-Nemo-Instruct-2407",
176
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
177
- "mistralai/Mistral-7B-Instruct-v0.3",
178
- "mistralai/Mistral-7B-Instruct-v0.2",
179
- "Qwen/Qwen3-235B-A22B",
180
- "Qwen/Qwen3-32B",
181
- "Qwen/Qwen2.5-72B-Instruct",
182
- "Qwen/Qwen2.5-3B-Instruct",
183
- "Qwen/Qwen2.5-0.5B-Instruct",
184
- "Qwen/QwQ-32B",
185
- "Qwen/Qwen2.5-Coder-32B-Instruct",
186
- "microsoft/Phi-3.5-mini-instruct",
187
- "microsoft/Phi-3-mini-128k-instruct",
188
- "microsoft/Phi-3-mini-4k-instruct",
189
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
190
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
191
- "HuggingFaceH4/zephyr-7b-beta",
192
- "HuggingFaceTB/SmolLM2-360M-Instruct",
193
- "tiiuae/falcon-7b-instruct",
194
- "01-ai/Yi-1.5-34B-Chat",
195
- ]
196
- print("Models list initialized.")
197
-
198
- featured_model_radio = gr.Radio(
199
- label="Select a model below",
200
- choices=models_list,
201
- value="meta-llama/Llama-3.3-70B-Instruct",
202
- interactive=True
203
- )
204
- print("Featured models radio button created.")
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  def filter_models(search_term):
207
  print(f"Filtering models with search term: {search_term}")
 
208
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
209
  print(f"Filtered models: {filtered}")
 
210
  return gr.update(choices=filtered)
211
 
 
212
  model_search_box.change(
213
- fn=filter_models,
214
- inputs=model_search_box,
215
- outputs=featured_model_radio
216
  )
217
  print("Model search box change event linked.")
218
 
219
- featured_model_radio.change(
220
- fn=set_custom_model_from_radio,
221
- inputs=featured_model_radio,
222
- outputs=custom_model_box
223
- )
224
- print("Featured model radio button change event linked.")
225
 
226
- print("Gradio interface initialized.")
227
 
 
228
  if __name__ == "__main__":
229
- print("Launching the demo application.")
 
230
  demo.launch(show_api=True)
 
2
  from openai import OpenAI
3
  import os
4
 
5
+ # Load the Hugging Face access token from environment variables
6
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
7
  print("Access token loaded.")
8
 
9
+ # Initialize the OpenAI client pointing to the Hugging Face Inference API
10
  client = OpenAI(
11
  base_url="https://api-inference.huggingface.co/v1/",
12
  api_key=ACCESS_TOKEN,
13
  )
14
  print("OpenAI client initialized.")
15
 
16
+ # Define the main function that handles chat responses
17
  def respond(
18
  message,
19
  history: list[tuple[str, str]],
 
23
  top_p,
24
  frequency_penalty,
25
  seed,
26
+ custom_model, # Input from the Custom Model textbox
27
+ featured_model # Input from the Featured Model radio buttons <<< NEW INPUT
28
  ):
29
 
30
  print(f"Received message: {message}")
 
32
  print(f"System message: {system_message}")
33
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
34
  print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
35
+ print(f"Custom model input: '{custom_model}'")
36
+ print(f"Selected featured model: {featured_model}") # Log the featured model selection
37
 
38
  # Convert seed to None if -1 (meaning random)
39
  if seed == -1:
40
  seed = None
41
 
42
+ # Start constructing the message list for the API call with the system message
43
  messages = [{"role": "system", "content": system_message}]
44
  print("Initial messages array constructed.")
45
 
46
+ # Add the conversation history to the messages list
47
  for val in history:
48
  user_part = val[0]
49
  assistant_part = val[1]
 
54
  messages.append({"role": "assistant", "content": assistant_part})
55
  print(f"Added assistant message to context: {assistant_part}")
56
 
57
+ # Add the latest user message to the list
58
  messages.append({"role": "user", "content": message})
59
  print("Latest user message appended.")
60
 
61
+ # <<< MODEL SELECTION LOGIC UPDATED >>>
62
+ # Determine the model to use: Prioritize the custom model box if it's filled,
63
+ # otherwise use the selected featured model.
64
+ custom_model_stripped = custom_model.strip() # Remove leading/trailing whitespace
65
+ if custom_model_stripped != "":
66
+ model_to_use = custom_model_stripped # Use custom model if provided
67
+ print(f"Using custom model: {model_to_use}")
68
+ else:
69
+ model_to_use = featured_model # Use the selected featured model
70
+ print(f"Using selected featured model: {model_to_use}")
71
+
72
 
73
+ # Initialize an empty string to accumulate the response tokens
74
  response = ""
75
+ print("Sending request to Hugging Face Inference API.")
76
 
77
+ # Stream the response from the API
78
  for message_chunk in client.chat.completions.create(
79
+ model=model_to_use, # Use the determined model
80
+ max_tokens=max_tokens, # Set maximum tokens for the response
81
+ stream=True, # Enable streaming responses
82
+ temperature=temperature, # Set sampling temperature
83
+ top_p=top_p, # Set nucleus sampling probability
84
+ frequency_penalty=frequency_penalty, # Set frequency penalty
85
+ seed=seed, # Set random seed (if provided)
86
+ messages=messages, # Pass the constructed message history
87
  ):
88
+ # Get the text content from the current chunk
89
  token_text = message_chunk.choices[0].delta.content
90
+ # Append the token text to the response string (if it's not None)
91
+ if token_text:
92
+ print(f"Received token: {token_text}")
93
+ response += token_text
94
+ yield response # Yield the partial response back to Gradio for live updates
95
 
96
  print("Completed response generation.")
97
 
98
+ # --- GRADIO UI ---
99
 
100
+ # Create the main chatbot display area
101
  chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="Select a model and begin chatting", layout="panel")
102
  print("Chatbot interface created.")
103
 
104
+ # Create the System Prompt input box
105
  system_message_box = gr.Textbox(value="", placeholder="You are a helpful assistant.", label="System Prompt")
106
 
107
+ # Create sliders for model parameters
108
+ max_tokens_slider = gr.Slider(minimum=1, maximum=4096, value=512, step=1, label="Max new tokens")
109
+ temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
110
+ top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P")
111
+ frequency_penalty_slider = gr.Slider(minimum=-2.0, maximum=2.0, value=0.0, step=0.1, label="Frequency Penalty")
112
+ seed_slider = gr.Slider(minimum=-1, maximum=65535, value=-1, step=1, label="Seed (-1 for random)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ # Create the Custom Model input box
115
  custom_model_box = gr.Textbox(
116
+ value="", # Default to empty
117
  label="Custom Model",
118
+ info="(Optional) Provide a custom Hugging Face model path. Overrides the featured model selection below.",
119
+ placeholder="e.g., username/my-custom-model" # Updated placeholder
120
+ )
121
+
122
+ # Define the list of featured models
123
+ models_list = [
124
+ "meta-llama/Llama-3.3-70B-Instruct", # Default selected model
125
+ "meta-llama/Llama-3.1-70B-Instruct",
126
+ "meta-llama/Llama-3.0-70B-Instruct",
127
+ "meta-llama/Llama-3.2-3B-Instruct",
128
+ "meta-llama/Llama-3.2-1B-Instruct",
129
+ "meta-llama/Llama-3.1-8B-Instruct",
130
+ "NousResearch/Hermes-3-Llama-3.1-8B",
131
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
132
+ "mistralai/Mistral-Nemo-Instruct-2407",
133
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
134
+ "mistralai/Mistral-7B-Instruct-v0.3",
135
+ "mistralai/Mistral-7B-Instruct-v0.2",
136
+ "Qwen/Qwen3-235B-A22B",
137
+ "Qwen/Qwen3-32B",
138
+ "Qwen/Qwen2.5-72B-Instruct",
139
+ "Qwen/Qwen2.5-3B-Instruct",
140
+ "Qwen/Qwen2.5-0.5B-Instruct",
141
+ "Qwen/QwQ-32B",
142
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
143
+ "microsoft/Phi-3.5-mini-instruct",
144
+ "microsoft/Phi-3-mini-128k-instruct",
145
+ "microsoft/Phi-3-mini-4k-instruct",
146
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
147
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
148
+ "HuggingFaceH4/zephyr-7b-beta",
149
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
150
+ "tiiuae/falcon-7b-instruct",
151
+ "01-ai/Yi-1.5-34B-Chat",
152
+ ]
153
+ print("Models list initialized.")
154
+
155
+ # Create the radio button selector for featured models
156
+ featured_model_radio = gr.Radio(
157
+ label="Select a Featured Model", # Changed label slightly
158
+ choices=models_list,
159
+ value="meta-llama/Llama-3.3-70B-Instruct", # Set the default selection
160
+ interactive=True
161
  )
162
+ print("Featured models radio button created.")
163
 
 
 
 
 
 
 
 
164
 
165
+ # --- Create the main Chat Interface ---
166
+ # <<< `additional_inputs` UPDATED >>>
167
  demo = gr.ChatInterface(
168
+ fn=respond, # The function to call when a message is sent
169
+ additional_inputs=[ # List of input components passed to the 'respond' function
170
  system_message_box,
171
  max_tokens_slider,
172
  temperature_slider,
 
174
  frequency_penalty_slider,
175
  seed_slider,
176
  custom_model_box,
177
+ featured_model_radio # Pass the radio button selection <<< ADDED
178
  ],
179
+ fill_height=True, # Make the interface fill the available height
180
+ chatbot=chatbot, # Use the predefined chatbot component
181
+ theme="Nymbo/Nymbo_Theme", # Apply a theme
182
  )
183
  print("ChatInterface object created.")
184
 
185
+ # --- Add Model Selection Controls within the Interface ---
186
+ with demo: # Use the ChatInterface as a context manager to add elements
187
+ with gr.Accordion("Model Selection & Parameters", open=False): # Group controls in an accordion
188
+ # --- Featured Model Selection ---
189
+ gr.Markdown("### Featured Models") # Section title
190
  model_search_box = gr.Textbox(
191
  label="Filter Models",
192
+ placeholder="Search featured models...",
193
  lines=1
194
  )
195
  print("Model search box created.")
196
 
197
+ # Place the radio buttons here
198
+ # No need to define `featured_model_radio` again, just use the variable defined above
199
+ demo.load(lambda: featured_model_radio, outputs=featured_model_radio) # Ensure it appears in the layout
200
+ print("Featured model radio added to layout.")
201
+
202
+
203
+ # --- Custom Model Input ---
204
+ gr.Markdown("### Custom Model") # Section title
205
+ # No need to define `custom_model_box` again, just use the variable defined above
206
+ demo.load(lambda: custom_model_box, outputs=custom_model_box) # Ensure it appears in the layout
207
+ print("Custom model box added to layout.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ # --- Parameters ---
210
+ gr.Markdown("### Parameters") # Section title
211
+ # Add sliders to the layout
212
+ demo.load(lambda: max_tokens_slider, outputs=max_tokens_slider)
213
+ demo.load(lambda: temperature_slider, outputs=temperature_slider)
214
+ demo.load(lambda: top_p_slider, outputs=top_p_slider)
215
+ demo.load(lambda: frequency_penalty_slider, outputs=frequency_penalty_slider)
216
+ demo.load(lambda: seed_slider, outputs=seed_slider)
217
+ print("Parameter sliders added to layout.")
218
+
219
+
220
+ # --- Event Listeners ---
221
+
222
+ # Function to filter the radio button choices based on search input
223
  def filter_models(search_term):
224
  print(f"Filtering models with search term: {search_term}")
225
+ # List comprehension to find models matching the search term (case-insensitive)
226
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
227
  print(f"Filtered models: {filtered}")
228
+ # Update the 'choices' property of the radio button component
229
  return gr.update(choices=filtered)
230
 
231
+ # Link the search box's 'change' event to the filter function
232
  model_search_box.change(
233
+ fn=filter_models, # Function to call
234
+ inputs=model_search_box, # Input component triggering the event
235
+ outputs=featured_model_radio # Output component to update
236
  )
237
  print("Model search box change event linked.")
238
 
 
 
 
 
 
 
239
 
240
+ print("Gradio interface layout defined.")
241
 
242
+ # --- Launch the Application ---
243
  if __name__ == "__main__":
244
+ print("Launching the Gradio demo application.")
245
+ # Launch the Gradio app with API endpoint enabled
246
  demo.launch(show_api=True)