camanalo1 commited on
Commit
311f586
·
verified ·
1 Parent(s): 285309e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -18,9 +18,11 @@ def transcribe_and_generate_audio(audio):
18
  try:
19
  # Transcribe audio
20
  asr_output = transcriber(audio)["text"]
 
21
 
22
  # Generate text based on ASR output
23
  generated_text = generator(asr_output)[0]['generated_text']
 
24
 
25
  # Generate audio from text using TTS model
26
  inputs = tokenizer_tts(text=generated_text, return_tensors="pt")
@@ -31,7 +33,7 @@ def transcribe_and_generate_audio(audio):
31
  waveform_path = "output.wav"
32
  sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
33
 
34
- return waveform_path
35
  except Exception as e:
36
  return f"Error: {str(e)}"
37
 
@@ -39,7 +41,7 @@ def transcribe_and_generate_audio(audio):
39
  audio_input = gr.Interface(
40
  transcribe_and_generate_audio,
41
  gr.Audio(sources=["microphone"], label="Speak Here"),
42
- "audio",
43
  title="ASR -> LLM -> TTS",
44
  description="Speak into the microphone and hear the generated audio."
45
  )
 
18
  try:
19
  # Transcribe audio
20
  asr_output = transcriber(audio)["text"]
21
+ print("ASR Output:", asr_output)
22
 
23
  # Generate text based on ASR output
24
  generated_text = generator(asr_output)[0]['generated_text']
25
+ print("Generated Text:", generated_text)
26
 
27
  # Generate audio from text using TTS model
28
  inputs = tokenizer_tts(text=generated_text, return_tensors="pt")
 
33
  waveform_path = "output.wav"
34
  sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
35
 
36
+ return waveform_path, asr_output, generated_text
37
  except Exception as e:
38
  return f"Error: {str(e)}"
39
 
 
41
  audio_input = gr.Interface(
42
  transcribe_and_generate_audio,
43
  gr.Audio(sources=["microphone"], label="Speak Here"),
44
+ ["audio", "text", "text"],
45
  title="ASR -> LLM -> TTS",
46
  description="Speak into the microphone and hear the generated audio."
47
  )