Spaces:

Quantamhash
/

Quantum_STT-V1

Running on Zero

sbapan41 commited on 10 days ago

Commit

f1b1f12

verified ·

1 Parent(s): 5f3de40

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,27 @@
-import gradio as gr
 from transformers import pipeline
 import os
 # Model ID from Hugging Face
-model_id = "sbapan41/Quantum_STT"
-# Load the speech recognition pipeline
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model_id,
     generate_kwargs={"language": "en", "task": "transcribe"},
-    tokenizer=model_id
 )
 # Transcription function with format check
 def transcribe(audio):
     if audio is None:
@@ -23,8 +32,17 @@ def transcribe(audio):
     if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
         return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
-    result = pipe(audio)
-    return result["text"]
 # Gradio interface
 interface = gr.Interface(
@@ -40,4 +58,4 @@ interface = gr.Interface(
 )
 # Launch the interface
-interface.launch()

 from transformers import pipeline
+import gradio as gr
 import os
+from pydub import AudioSegment
+import tempfile
 # Model ID from Hugging Face
+model_id = "Quantamhash/Quantum_STT"
+# Load the speech recognition pipeline with CPU support
 pipe = pipeline(
     "automatic-speech-recognition",
     model=model_id,
     generate_kwargs={"language": "en", "task": "transcribe"},
+    tokenizer=model_id,
+    device=-1  # Use CPU
 )
+def convert_to_wav(input_path):
+    audio = AudioSegment.from_file(input_path)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
+        audio.export(temp_wav.name, format="wav")
+        return temp_wav.name
 # Transcription function with format check
 def transcribe(audio):
     if audio is None:
     if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
         return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
+    # Convert to .wav if necessary
+    if ext != ".wav":
+        audio = convert_to_wav(audio)
+    try:
+        result = pipe(audio)
+        return result["text"]
+    except ValueError as e:
+        return f"Error processing audio file: {str(e)}"
+    except Exception as e:
+        return f"An unexpected error occurred: {str(e)}"
 # Gradio interface
 interface = gr.Interface(
 )
 # Launch the interface
+interface.launch()