sbapan41 commited on
Commit
f1b1f12
·
verified ·
1 Parent(s): 5f3de40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -7
app.py CHANGED
@@ -1,18 +1,27 @@
1
- import gradio as gr
2
  from transformers import pipeline
 
3
  import os
 
 
4
 
5
  # Model ID from Hugging Face
6
- model_id = "sbapan41/Quantum_STT"
7
 
8
- # Load the speech recognition pipeline
9
  pipe = pipeline(
10
  "automatic-speech-recognition",
11
  model=model_id,
12
  generate_kwargs={"language": "en", "task": "transcribe"},
13
- tokenizer=model_id
 
14
  )
15
 
 
 
 
 
 
 
16
  # Transcription function with format check
17
  def transcribe(audio):
18
  if audio is None:
@@ -23,8 +32,17 @@ def transcribe(audio):
23
  if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
24
  return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
25
 
26
- result = pipe(audio)
27
- return result["text"]
 
 
 
 
 
 
 
 
 
28
 
29
  # Gradio interface
30
  interface = gr.Interface(
@@ -40,4 +58,4 @@ interface = gr.Interface(
40
  )
41
 
42
  # Launch the interface
43
- interface.launch()
 
 
1
  from transformers import pipeline
2
+ import gradio as gr
3
  import os
4
+ from pydub import AudioSegment
5
+ import tempfile
6
 
7
  # Model ID from Hugging Face
8
+ model_id = "Quantamhash/Quantum_STT"
9
 
10
+ # Load the speech recognition pipeline with CPU support
11
  pipe = pipeline(
12
  "automatic-speech-recognition",
13
  model=model_id,
14
  generate_kwargs={"language": "en", "task": "transcribe"},
15
+ tokenizer=model_id,
16
+ device=-1 # Use CPU
17
  )
18
 
19
+ def convert_to_wav(input_path):
20
+ audio = AudioSegment.from_file(input_path)
21
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
22
+ audio.export(temp_wav.name, format="wav")
23
+ return temp_wav.name
24
+
25
  # Transcription function with format check
26
  def transcribe(audio):
27
  if audio is None:
 
32
  if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
33
  return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
34
 
35
+ # Convert to .wav if necessary
36
+ if ext != ".wav":
37
+ audio = convert_to_wav(audio)
38
+
39
+ try:
40
+ result = pipe(audio)
41
+ return result["text"]
42
+ except ValueError as e:
43
+ return f"Error processing audio file: {str(e)}"
44
+ except Exception as e:
45
+ return f"An unexpected error occurred: {str(e)}"
46
 
47
  # Gradio interface
48
  interface = gr.Interface(
 
58
  )
59
 
60
  # Launch the interface
61
+ interface.launch()