Spaces:
Running
Running
File size: 1,117 Bytes
1c817fd e83e49f 1c817fd e83e49f 1c817fd e83e49f 1c817fd e83e49f 1c817fd e83e49f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import os
import uuid
from flask import jsonify, send_file, request
from main import *
import torch
import torchaudio
def speech_to_text_func(audio_path):
if stt_model is None:
return {"error": "STT model not initialized."}
waveform, sample_rate = torchaudio.load(audio_path)
if waveform.ndim > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
waveform = waveform.to(device)
with torch.no_grad():
logits = stt_model(waveform)
predicted_ids = torch.argmax(logits, dim=-1)
transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist())
return {"text": transcription}
def stt_api():
if 'audio' not in request.files:
return jsonify({"error": "Audio file is required"}), 400
audio_file = request.files['audio']
temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav"
audio_file.save(temp_audio_path)
output = speech_to_text_func(temp_audio_path)
os.remove(temp_audio_path)
if "error" in output:
return jsonify({"error": output["error"]}), 500
return jsonify(output)
|