Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,27 @@
|
|
1 |
-
import gradio as gr
|
2 |
from transformers import pipeline
|
|
|
3 |
import os
|
|
|
|
|
4 |
|
5 |
# Model ID from Hugging Face
|
6 |
-
model_id = "
|
7 |
|
8 |
-
# Load the speech recognition pipeline
|
9 |
pipe = pipeline(
|
10 |
"automatic-speech-recognition",
|
11 |
model=model_id,
|
12 |
generate_kwargs={"language": "en", "task": "transcribe"},
|
13 |
-
tokenizer=model_id
|
|
|
14 |
)
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# Transcription function with format check
|
17 |
def transcribe(audio):
|
18 |
if audio is None:
|
@@ -23,8 +32,17 @@ def transcribe(audio):
|
|
23 |
if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
|
24 |
return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
|
25 |
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# Gradio interface
|
30 |
interface = gr.Interface(
|
@@ -40,4 +58,4 @@ interface = gr.Interface(
|
|
40 |
)
|
41 |
|
42 |
# Launch the interface
|
43 |
-
interface.launch()
|
|
|
|
|
1 |
from transformers import pipeline
|
2 |
+
import gradio as gr
|
3 |
import os
|
4 |
+
from pydub import AudioSegment
|
5 |
+
import tempfile
|
6 |
|
7 |
# Model ID from Hugging Face
|
8 |
+
model_id = "Quantamhash/Quantum_STT"
|
9 |
|
10 |
+
# Load the speech recognition pipeline with CPU support
|
11 |
pipe = pipeline(
|
12 |
"automatic-speech-recognition",
|
13 |
model=model_id,
|
14 |
generate_kwargs={"language": "en", "task": "transcribe"},
|
15 |
+
tokenizer=model_id,
|
16 |
+
device=-1 # Use CPU
|
17 |
)
|
18 |
|
19 |
+
def convert_to_wav(input_path):
|
20 |
+
audio = AudioSegment.from_file(input_path)
|
21 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
|
22 |
+
audio.export(temp_wav.name, format="wav")
|
23 |
+
return temp_wav.name
|
24 |
+
|
25 |
# Transcription function with format check
|
26 |
def transcribe(audio):
|
27 |
if audio is None:
|
|
|
32 |
if ext not in [".caf", ".au", ".opus", ".amr", ".alac", ".aiff", ".wma", ".m4a", ".ogg", ".aac", ".flac", ".wav", ".mp3"]:
|
33 |
return f"❌ Unsupported file format: {ext}. Please upload .caf, .au, .opus, .amr, .alac, .aiff, .wma, .m4a, .ogg, .aac, .flac, .wav or .mp3 files."
|
34 |
|
35 |
+
# Convert to .wav if necessary
|
36 |
+
if ext != ".wav":
|
37 |
+
audio = convert_to_wav(audio)
|
38 |
+
|
39 |
+
try:
|
40 |
+
result = pipe(audio)
|
41 |
+
return result["text"]
|
42 |
+
except ValueError as e:
|
43 |
+
return f"Error processing audio file: {str(e)}"
|
44 |
+
except Exception as e:
|
45 |
+
return f"An unexpected error occurred: {str(e)}"
|
46 |
|
47 |
# Gradio interface
|
48 |
interface = gr.Interface(
|
|
|
58 |
)
|
59 |
|
60 |
# Launch the interface
|
61 |
+
interface.launch()
|