Hhhh / stt_api.py
Hjgugugjhuhjggg's picture
Upload 28 files
e83e49f verified
raw
history blame
1.12 kB
import os
import uuid
from flask import jsonify, send_file, request
from main import *
import torch
import torchaudio
def speech_to_text_func(audio_path):
if stt_model is None:
return {"error": "STT model not initialized."}
waveform, sample_rate = torchaudio.load(audio_path)
if waveform.ndim > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
waveform = waveform.to(device)
with torch.no_grad():
logits = stt_model(waveform)
predicted_ids = torch.argmax(logits, dim=-1)
transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist())
return {"text": transcription}
def stt_api():
if 'audio' not in request.files:
return jsonify({"error": "Audio file is required"}), 400
audio_file = request.files['audio']
temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav"
audio_file.save(temp_audio_path)
output = speech_to_text_func(temp_audio_path)
os.remove(temp_audio_path)
if "error" in output:
return jsonify({"error": output["error"]}), 500
return jsonify(output)