Bhaskar2611 commited on
Commit
e35136f
Β·
verified Β·
1 Parent(s): 399d5a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -45
app.py CHANGED
@@ -140,6 +140,118 @@
140
  # if __name__ == "__main__":
141
  # demo.launch()
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  import gradio as gr
144
  from huggingface_hub import InferenceClient
145
  import tempfile
@@ -150,38 +262,33 @@ stt_client = InferenceClient("openai/whisper-large-v3")
150
 
151
  def transcribe_audio(audio_file):
152
  """Convert audio to text using Whisper model"""
153
- with open(audio_file, "rb") as f:
154
- result = stt_client.automatic_speech_recognition(f.read())
155
- return result.text # Extract only the text from the response
 
 
 
 
156
 
157
  def respond(history, query):
158
- system_message = """You are a friendly Product Assistant. Follow these rules:
159
- 1. If the query is product-related, provide structured recommendations
160
- 2. Consider both voice and text inputs equally
161
- 3. Format responses with bullet points and emojis
162
- 4. Always acknowledge voice queries specifically"""
163
 
164
- product_prompt = f"""Analyze this {'voice' if history[-1][0] == query else 'text'} query:
165
- "{query}"
166
  Recommend products considering:
167
- - User intent
168
  - Semantic meaning
169
- - Potential use cases
170
- - Price ranges
171
- Provide ranked list with brief explanations"""
172
 
173
  messages = [
174
  {"role": "system", "content": system_message},
175
  {"role": "user", "content": product_prompt}
176
  ]
177
 
178
- # Build conversation history
179
- for entry in history[:-1]: # Exclude current query
180
- messages.extend([
181
- {"role": "user", "content": entry[0]},
182
- {"role": "assistant", "content": entry[1]}
183
- ])
184
-
185
  # Generate streamed response
186
  response = ""
187
  for chunk in chat_client.chat_completion(
@@ -193,49 +300,65 @@ def respond(history, query):
193
  ):
194
  token = chunk.choices[0].delta.content
195
  response += token
196
- history[-1] = (history[-1][0], response) # Update last entry
197
  yield history
198
 
199
- # Custom styling
200
  css = """
201
- .gradio-container { background: #f5f7fa !important; }
202
  .audio-input { background: white !important; border-radius: 10px; }
 
203
  """
204
 
205
- with gr.Blocks(css=css) as demo:
206
- gr.Markdown("# 🎀 Voice-Activated Product Advisor πŸ›οΈ")
207
 
208
  with gr.Row():
209
  chatbot = gr.Chatbot(height=600, bubble_full_width=False)
210
 
211
  with gr.Column():
212
- with gr.Tab("πŸŽ™οΈ Voice Input"):
 
213
  audio_input = gr.Audio(
214
  sources="microphone",
215
  type="filepath",
216
- label="Speak your product request",
217
- elem_classes="audio-input"
 
218
  )
219
- with gr.Tab("πŸ“ Text Input"):
220
- text_input = gr.Textbox(label="Type your request")
 
 
 
 
221
  submit_btn = gr.Button("πŸš€ Get Recommendations", variant="primary")
222
 
 
 
 
 
 
223
  def process_inputs(text, audio, history):
224
- """Handle both input types"""
225
  query = text.strip()
226
  if audio:
227
- query = transcribe_audio(audio)
228
- # Add voice-specific marker
229
- query = f"🎀 Voice Query: {query}"
230
 
231
  if query:
232
- return history + [(query, None)] # Proper tuple format
233
- return history
234
 
 
 
 
 
 
 
 
235
  submit_btn.click(
236
  process_inputs,
237
  [text_input, audio_input, chatbot],
238
- chatbot,
239
  queue=False
240
  ).then(
241
  respond,
@@ -243,11 +366,5 @@ with gr.Blocks(css=css) as demo:
243
  chatbot
244
  )
245
 
246
- # Clear inputs after submission
247
- submit_btn.click(
248
- lambda: [None, None], # Clear audio and text inputs
249
- outputs=[text_input, audio_input]
250
- )
251
-
252
  if __name__ == "__main__":
253
- demo.launch()
 
140
  # if __name__ == "__main__":
141
  # demo.launch()
142
 
143
+ # import gradio as gr
144
+ # from huggingface_hub import InferenceClient
145
+ # import tempfile
146
+
147
+ # # Initialize clients
148
+ # chat_client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct")
149
+ # stt_client = InferenceClient("openai/whisper-large-v3")
150
+
151
+ # def transcribe_audio(audio_file):
152
+ # """Convert audio to text using Whisper model"""
153
+ # with open(audio_file, "rb") as f:
154
+ # result = stt_client.automatic_speech_recognition(f.read())
155
+ # return result.text # Extract only the text from the response
156
+
157
+ # def respond(history, query):
158
+ # system_message = """You are a friendly Product Assistant. Follow these rules:
159
+ # 1. If the query is product-related, provide structured recommendations
160
+ # 2. Consider both voice and text inputs equally
161
+ # 3. Format responses with bullet points and emojis
162
+ # 4. Always acknowledge voice queries specifically"""
163
+
164
+ # product_prompt = f"""Analyze this {'voice' if history[-1][0] == query else 'text'} query:
165
+ # "{query}"
166
+ # Recommend products considering:
167
+ # - User intent
168
+ # - Semantic meaning
169
+ # - Potential use cases
170
+ # - Price ranges
171
+ # Provide ranked list with brief explanations"""
172
+
173
+ # messages = [
174
+ # {"role": "system", "content": system_message},
175
+ # {"role": "user", "content": product_prompt}
176
+ # ]
177
+
178
+ # # Build conversation history
179
+ # for entry in history[:-1]: # Exclude current query
180
+ # messages.extend([
181
+ # {"role": "user", "content": entry[0]},
182
+ # {"role": "assistant", "content": entry[1]}
183
+ # ])
184
+
185
+ # # Generate streamed response
186
+ # response = ""
187
+ # for chunk in chat_client.chat_completion(
188
+ # messages,
189
+ # max_tokens=2048,
190
+ # stream=True,
191
+ # temperature=0.7,
192
+ # top_p=0.95,
193
+ # ):
194
+ # token = chunk.choices[0].delta.content
195
+ # response += token
196
+ # history[-1] = (history[-1][0], response) # Update last entry
197
+ # yield history
198
+
199
+ # # Custom styling
200
+ # css = """
201
+ # .gradio-container { background: #f5f7fa !important; }
202
+ # .audio-input { background: white !important; border-radius: 10px; }
203
+ # """
204
+
205
+ # with gr.Blocks(css=css) as demo:
206
+ # gr.Markdown("# 🎀 Voice-Activated Product Advisor πŸ›οΈ")
207
+
208
+ # with gr.Row():
209
+ # chatbot = gr.Chatbot(height=600, bubble_full_width=False)
210
+
211
+ # with gr.Column():
212
+ # with gr.Tab("πŸŽ™οΈ Voice Input"):
213
+ # audio_input = gr.Audio(
214
+ # sources="microphone",
215
+ # type="filepath",
216
+ # label="Speak your product request",
217
+ # elem_classes="audio-input"
218
+ # )
219
+ # with gr.Tab("πŸ“ Text Input"):
220
+ # text_input = gr.Textbox(label="Type your request")
221
+ # submit_btn = gr.Button("πŸš€ Get Recommendations", variant="primary")
222
+
223
+ # def process_inputs(text, audio, history):
224
+ # """Handle both input types"""
225
+ # query = text.strip()
226
+ # if audio:
227
+ # query = transcribe_audio(audio)
228
+ # # Add voice-specific marker
229
+ # query = f"🎀 Voice Query: {query}"
230
+
231
+ # if query:
232
+ # return history + [(query, None)] # Proper tuple format
233
+ # return history
234
+
235
+ # submit_btn.click(
236
+ # process_inputs,
237
+ # [text_input, audio_input, chatbot],
238
+ # chatbot,
239
+ # queue=False
240
+ # ).then(
241
+ # respond,
242
+ # [chatbot, text_input],
243
+ # chatbot
244
+ # )
245
+
246
+ # # Clear inputs after submission
247
+ # submit_btn.click(
248
+ # lambda: [None, None], # Clear audio and text inputs
249
+ # outputs=[text_input, audio_input]
250
+ # )
251
+
252
+ # if __name__ == "__main__":
253
+ # demo.launch()
254
+
255
  import gradio as gr
256
  from huggingface_hub import InferenceClient
257
  import tempfile
 
262
 
263
  def transcribe_audio(audio_file):
264
  """Convert audio to text using Whisper model"""
265
+ try:
266
+ with open(audio_file, "rb") as f:
267
+ result = stt_client.automatic_speech_recognition(f.read())
268
+ return result.text
269
+ except Exception as e:
270
+ print(f"Transcription error: {e}")
271
+ return "Could not process audio. Please try again."
272
 
273
  def respond(history, query):
274
+ system_message = """You are a Voice-Aware Product Assistant. Rules:
275
+ 1. Always acknowledge voice queries with 🎧 icon
276
+ 2. Provide structured recommendations with emojis
277
+ 3. Consider both voice and text inputs equally"""
 
278
 
279
+ product_prompt = f"""Analyze this {'🎧 VOICE' if '🎧' in query else 'πŸ“ TEXT'} query:
280
+ "{query.replace('🎧 VOICE: ', '')}"
281
  Recommend products considering:
282
+ - Voice tone analysis (if audio)
283
  - Semantic meaning
284
+ - User intent
285
+ - Price context"""
 
286
 
287
  messages = [
288
  {"role": "system", "content": system_message},
289
  {"role": "user", "content": product_prompt}
290
  ]
291
 
 
 
 
 
 
 
 
292
  # Generate streamed response
293
  response = ""
294
  for chunk in chat_client.chat_completion(
 
300
  ):
301
  token = chunk.choices[0].delta.content
302
  response += token
303
+ history[-1] = (history[-1][0], response)
304
  yield history
305
 
 
306
  css = """
307
+ .gradio-container { background: #f8f9fa !important; }
308
  .audio-input { background: white !important; border-radius: 10px; }
309
+ .mic-status { color: #4a90e2; font-weight: bold; }
310
  """
311
 
312
+ with gr.Blocks(css=css, title="Voice Product Assistant") as demo:
313
+ gr.Markdown("# 🎧 Voice-Activated Product Advisor πŸ›’")
314
 
315
  with gr.Row():
316
  chatbot = gr.Chatbot(height=600, bubble_full_width=False)
317
 
318
  with gr.Column():
319
+ # Audio input with status indicator
320
+ with gr.Group():
321
  audio_input = gr.Audio(
322
  sources="microphone",
323
  type="filepath",
324
+ label="Click mic & speak",
325
+ elem_classes="audio-input",
326
+ interactive=True
327
  )
328
+ mic_status = gr.Markdown("πŸ”΄ Mic offline", elem_classes="mic-status")
329
+
330
+ # Text input
331
+ text_input = gr.Textbox(label="Or type your request")
332
+
333
+ # Unified submit button
334
  submit_btn = gr.Button("πŸš€ Get Recommendations", variant="primary")
335
 
336
+ # Handle audio permissions
337
+ def request_mic_access():
338
+ return gr.update(text="🟒 Mic ready") if audio_input.is_enabled else gr.update(text="πŸ”΄ Mic blocked")
339
+
340
+ # Process inputs
341
  def process_inputs(text, audio, history):
 
342
  query = text.strip()
343
  if audio:
344
+ transcript = transcribe_audio(audio)
345
+ query = f"🎧 VOICE: {transcript}"
 
346
 
347
  if query:
348
+ return history + [(query, None)], ""
349
+ return history, ""
350
 
351
+ # Connect all components
352
+ audio_input.change(
353
+ request_mic_access,
354
+ outputs=mic_status,
355
+ queue=False
356
+ )
357
+
358
  submit_btn.click(
359
  process_inputs,
360
  [text_input, audio_input, chatbot],
361
+ [chatbot, text_input],
362
  queue=False
363
  ).then(
364
  respond,
 
366
  chatbot
367
  )
368
 
 
 
 
 
 
 
369
  if __name__ == "__main__":
370
+ demo.launch(server_port=7860, share=False)